Repository: nv-tlabs/kimodo Branch: main Commit: 54257dd8ff18 Files: 320 Total size: 1.6 MB Directory structure: gitextract__xaaoo6k/ ├── .gitignore ├── .pre-commit-config.yaml ├── ATTRIBUTIONS.MD ├── CHANGELOG.md ├── CONTRIBUTING.MD ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── MotionCorrection/ │ ├── .gitignore │ ├── CMakeLists.txt │ ├── MANIFEST.in │ ├── README.md │ ├── python/ │ │ └── motion_correction/ │ │ ├── __init__.py │ │ └── motion_postprocess.py │ ├── run_test.py │ ├── setup.py │ └── src/ │ └── cpp/ │ ├── AnimProcessing/ │ │ ├── InverseKinematics.cpp │ │ ├── InverseKinematics.h │ │ ├── TrajectoryCorrector.cpp │ │ ├── TrajectoryCorrector.h │ │ ├── Utility.cpp │ │ └── Utility.h │ ├── BindingsPython.cpp │ ├── Compiler.h │ ├── Debug.h │ ├── Math/ │ │ ├── Constants.h │ │ ├── Matrix.cpp │ │ ├── Matrix.h │ │ ├── Matrix.inl │ │ ├── Quaternion.cpp │ │ ├── Quaternion.h │ │ ├── Quaternion.inl │ │ ├── SIMD.h │ │ ├── Scalar.h │ │ ├── Transform.cpp │ │ ├── Transform.h │ │ ├── Transform.inl │ │ ├── Types.cpp │ │ ├── Types.h │ │ ├── Vector.cpp │ │ ├── Vector.h │ │ └── Vector.inl │ └── Platform.h ├── README.md ├── benchmark/ │ ├── create_benchmark.py │ ├── embed_folder.py │ ├── evaluate_folder.py │ ├── generate_eval.py │ └── parse_folder.py ├── docker-compose.yaml ├── docker_requirements.in ├── docker_requirements.txt ├── docs/ │ ├── .gitattributes │ ├── Makefile │ ├── README.md │ ├── make.bat │ ├── requirements.txt │ └── source/ │ ├── _static/ │ │ └── custom.css │ ├── _templates/ │ │ └── apidoc/ │ │ ├── module.rst.jinja │ │ └── package.rst.jinja │ ├── api_reference/ │ │ ├── _generated/ │ │ │ ├── kimodo.demo.rst │ │ │ ├── kimodo.exports.rst │ │ │ ├── kimodo.metrics.rst │ │ │ ├── kimodo.model.llm2vec.models.rst │ │ │ ├── kimodo.model.llm2vec.rst │ │ │ ├── kimodo.model.rst │ │ │ ├── kimodo.motion_rep.reps.rst │ │ │ ├── kimodo.motion_rep.rst │ │ │ ├── kimodo.rst │ │ │ ├── kimodo.scripts.rst │ │ │ ├── kimodo.skeleton.rst │ │ │ ├── kimodo.viz.rst │ │ │ └── modules.rst │ │ ├── constraints.rst │ │ ├── exports.rst │ │ ├── index.rst │ │ ├── model.rst │ │ ├── motion_rep.rst │ │ ├── post-processing.rst │ │ ├── utilities.rst │ │ └── viz.rst │ ├── benchmark/ │ │ ├── introduction.md │ │ ├── metrics.md │ │ ├── pipeline.md │ │ └── results.md │ ├── conf.py │ ├── getting_started/ │ │ ├── installation.md │ │ ├── installation_docker.md │ │ ├── installation_smpl.md │ │ ├── installation_virtual_env.md │ │ └── quick_start.md │ ├── index.md │ ├── interactive_demo/ │ │ ├── constraints.md │ │ ├── examples.md │ │ ├── export_results.md │ │ ├── generation.md │ │ ├── index.md │ │ ├── launching.md │ │ ├── model_selection.md │ │ └── ui_overview.md │ ├── key_concepts/ │ │ ├── constraints.md │ │ ├── limitations.md │ │ ├── model.md │ │ ├── motion_representation.md │ │ └── skeleton.md │ ├── project_info.md │ ├── project_structure.md │ └── user_guide/ │ ├── cli.md │ ├── configuration.md │ ├── constraints.md │ ├── motion_convert.md │ ├── output_formats.md │ └── seed_dataset.md ├── kimodo/ │ ├── __init__.py │ ├── assets/ │ │ ├── demo/ │ │ │ └── examples/ │ │ │ ├── kimodo-g1-rp/ │ │ │ │ ├── 01_single_text_prompt/ │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ ├── 02_multi_text_ee_constraint/ │ │ │ │ │ ├── constraints.json │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ ├── 03_full_body_keyframes/ │ │ │ │ │ ├── constraints.json │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ ├── 04_ee_constraint/ │ │ │ │ │ ├── constraints.json │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ ├── 05_root_path/ │ │ │ │ │ ├── constraints.json │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ ├── 06_root_waypoints/ │ │ │ │ │ ├── constraints.json │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ ├── 07_text_terrain/ │ │ │ │ │ ├── meta.json │ │ │ │ │ └── motion.npz │ │ │ │ └── 08_text_object/ │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ └── kimodo-soma-rp/ │ │ │ ├── 01_single_text_prompt/ │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ ├── 02_multi_text_prompt/ │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ ├── 03_full_body_keyframes/ │ │ │ │ ├── constraints.json │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ ├── 04_ee_constraint/ │ │ │ │ ├── constraints.json │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ ├── 05_root_path/ │ │ │ │ ├── constraints.json │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ ├── 06_root_waypoints/ │ │ │ │ ├── constraints.json │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ ├── 07_mixed_constraints/ │ │ │ │ ├── constraints.json │ │ │ │ ├── meta.json │ │ │ │ └── motion.npz │ │ │ └── 08_stylized_text/ │ │ │ ├── meta.json │ │ │ └── motion.npz │ │ └── skeletons/ │ │ ├── g1skel34/ │ │ │ ├── joints.p │ │ │ ├── meshes/ │ │ │ │ └── g1/ │ │ │ │ ├── head_link.STL │ │ │ │ ├── left_ankle_pitch_link.STL │ │ │ │ ├── left_ankle_roll_link.STL │ │ │ │ ├── left_elbow_link.STL │ │ │ │ ├── left_hand_index_0_link.STL │ │ │ │ ├── left_hand_index_1_link.STL │ │ │ │ ├── left_hand_middle_0_link.STL │ │ │ │ ├── left_hand_middle_1_link.STL │ │ │ │ ├── left_hand_palm_link.STL │ │ │ │ ├── left_hand_thumb_0_link.STL │ │ │ │ ├── left_hand_thumb_1_link.STL │ │ │ │ ├── left_hand_thumb_2_link.STL │ │ │ │ ├── left_hip_pitch_link.STL │ │ │ │ ├── left_hip_roll_link.STL │ │ │ │ ├── left_hip_yaw_link.STL │ │ │ │ ├── left_knee_link.STL │ │ │ │ ├── left_rubber_hand.STL │ │ │ │ ├── left_shoulder_pitch_link.STL │ │ │ │ ├── left_shoulder_roll_link.STL │ │ │ │ ├── left_shoulder_yaw_link.STL │ │ │ │ ├── left_wrist_pitch_link.STL │ │ │ │ ├── left_wrist_roll_link.STL │ │ │ │ ├── left_wrist_roll_rubber_hand.STL │ │ │ │ ├── left_wrist_yaw_link.STL │ │ │ │ ├── logo_link.STL │ │ │ │ ├── pelvis.STL │ │ │ │ ├── pelvis_contour_link.STL │ │ │ │ ├── right_ankle_pitch_link.STL │ │ │ │ ├── right_ankle_roll_link.STL │ │ │ │ ├── right_elbow_link.STL │ │ │ │ ├── right_hand_index_0_link.STL │ │ │ │ ├── right_hand_index_1_link.STL │ │ │ │ ├── right_hand_middle_0_link.STL │ │ │ │ ├── right_hand_middle_1_link.STL │ │ │ │ ├── right_hand_palm_link.STL │ │ │ │ ├── right_hand_thumb_0_link.STL │ │ │ │ ├── right_hand_thumb_1_link.STL │ │ │ │ ├── right_hand_thumb_2_link.STL │ │ │ │ ├── right_hip_pitch_link.STL │ │ │ │ ├── right_hip_roll_link.STL │ │ │ │ ├── right_hip_yaw_link.STL │ │ │ │ ├── right_knee_link.STL │ │ │ │ ├── right_rubber_hand.STL │ │ │ │ ├── right_shoulder_pitch_link.STL │ │ │ │ ├── right_shoulder_roll_link.STL │ │ │ │ ├── right_shoulder_yaw_link.STL │ │ │ │ ├── right_wrist_pitch_link.STL │ │ │ │ ├── right_wrist_roll_link.STL │ │ │ │ ├── right_wrist_roll_rubber_hand.STL │ │ │ │ ├── right_wrist_yaw_link.STL │ │ │ │ ├── torso_constraint_L_link.STL │ │ │ │ ├── torso_constraint_L_rod_link.STL │ │ │ │ ├── torso_constraint_R_link.STL │ │ │ │ ├── torso_constraint_R_rod_link.STL │ │ │ │ ├── torso_link.STL │ │ │ │ ├── torso_link_23dof_rev_1_0.STL │ │ │ │ ├── torso_link_rev_1_0.STL │ │ │ │ ├── waist_constraint_L.STL │ │ │ │ ├── waist_constraint_R.STL │ │ │ │ ├── waist_roll_link.STL │ │ │ │ ├── waist_roll_link_rev_1_0.STL │ │ │ │ ├── waist_support_link.STL │ │ │ │ ├── waist_yaw_link.STL │ │ │ │ └── waist_yaw_link_rev_1_0.STL │ │ │ ├── rest_pose_local_rot.p │ │ │ └── xml/ │ │ │ └── g1.xml │ │ ├── smplx22/ │ │ │ ├── beta.npy │ │ │ ├── joints.p │ │ │ └── mean_hands.npy │ │ ├── somaskel30/ │ │ │ ├── joints.p │ │ │ └── soma_base_fit_mhr_params.npz │ │ └── somaskel77/ │ │ ├── bvh_joints.p │ │ ├── joints.p │ │ ├── relaxed_hands_rest_pose.npy │ │ ├── skin_standard.npz │ │ ├── somaskel77_standard_tpose.bvh │ │ └── standard_t_pose_global_offsets_rots.p │ ├── assets.py │ ├── constraints.py │ ├── demo/ │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── app.py │ │ ├── config.py │ │ ├── embedding_cache.py │ │ ├── generation.py │ │ ├── queue_manager.py │ │ ├── state.py │ │ └── ui.py │ ├── exports/ │ │ ├── __init__.py │ │ ├── bvh.py │ │ ├── motion_convert_lib.py │ │ ├── motion_formats.py │ │ ├── motion_io.py │ │ ├── mujoco.py │ │ └── smplx.py │ ├── geometry.py │ ├── meta.py │ ├── metrics/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── constraints.py │ │ ├── foot_skate.py │ │ └── tmr.py │ ├── model/ │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── cfg.py │ │ ├── common.py │ │ ├── diffusion.py │ │ ├── kimodo_model.py │ │ ├── llm2vec/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── llm2vec.py │ │ │ ├── llm2vec_wrapper.py │ │ │ └── models/ │ │ │ ├── __init__.py │ │ │ ├── attn_mask_utils.py │ │ │ ├── bidirectional_llama.py │ │ │ └── utils.py │ │ ├── load_model.py │ │ ├── loading.py │ │ ├── registry.py │ │ ├── text_encoder_api.py │ │ ├── tmr.py │ │ └── twostage_denoiser.py │ ├── motion_rep/ │ │ ├── __init__.py │ │ ├── conditioning.py │ │ ├── feature_utils.py │ │ ├── feet.py │ │ ├── reps/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── kimodo_motionrep.py │ │ │ └── tmr_motionrep.py │ │ ├── smooth_root.py │ │ └── stats.py │ ├── postprocess.py │ ├── sanitize.py │ ├── scripts/ │ │ ├── __init__.py │ │ ├── docker-entrypoint.sh │ │ ├── generate.py │ │ ├── gradio_theme.py │ │ ├── lock_requirements.py │ │ ├── motion_convert.py │ │ ├── mujoco_load.py │ │ └── run_text_encoder_server.py │ ├── skeleton/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bvh.py │ │ ├── definitions.py │ │ ├── kinematics.py │ │ ├── registry.py │ │ └── transforms.py │ ├── tools.py │ └── viz/ │ ├── __init__.py │ ├── constraint_ui.py │ ├── coords.py │ ├── g1_rig.py │ ├── gui.py │ ├── playback.py │ ├── scene.py │ ├── smplx_skin.py │ ├── soma_layer_skin.py │ ├── soma_skin.py │ └── viser_utils.py ├── pyproject.toml └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # debugging files debug/ SMPLX_NEUTRAL.npz # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so datasets # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # vscode .vscode *.code-workspace /pyrightconfig.json wandb/ # others out tmr_out .ruff_cache outputs /debug /batch*.sh checkpoints/**/test/* nohup.out *.swp *.swo *.txt~* *.un~ *~ train_done .aider* onelogger.err # deploy files /helm-library ================================================ FILE: .pre-commit-config.yaml ================================================ repos: # code formatting - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.6.4 hooks: - id: ruff name: sort imports with ruff args: [--select, I, --fix] - id: ruff-format name: format with ruff # docstring formatting - repo: https://github.com/PyCQA/docformatter rev: v1.7.7 hooks: - id: docformatter args: [ --in-place, --wrap-summaries=100, --wrap-descriptions=100, --style=sphinx, ] # yaml formatting - repo: https://github.com/pre-commit/mirrors-prettier rev: v3.0.0-alpha.6 hooks: - id: prettier types: [yaml] exclude: | (?x)^( environment\.yaml$ | \.gitlab-ci\.yml$ | \.k8s/.*\.(ya?ml)$ ) - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - id: trailing-whitespace # Trims trailing whitespace. - id: end-of-file-fixer # Makes sure files end in a newline and only a newline. - id: check-yaml # Attempts to load all yaml files to verify syntax. exclude: | (?x)^( \.gitlab-ci\.yml$ | \.k8s/.*\.(ya?ml)$ ) exclude: "checkpoints/.*" ================================================ FILE: ATTRIBUTIONS.MD ================================================ LLM2Vec MIT License https://github.com/McGill-NLP/llm2vec Copyright (c) 2024 McGill NLP Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Unitree mujoco BSD 3-Clause License https://github.com/unitreerobotics/unitree_mujoco/blob/main/LICENSE Copyright (c) 2016-2024 HangZhou YuShu TECHNOLOGY CO.,LTD. ("Unitree Robotics") All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [2026-05-03] ### Fixed - `benchmark/parse_folder.py` now averages each metric only over the testcases that actually report it. Previously, sparse constraint metrics (`constraint_root2d_acc`, `constraint_root2d_err`, `constraint_root2d_err_p95`, `constraint_fullbody_keyframe`, `constraint_end_effector`) were divided by the total motion count of the (split, category), including testcases of other constraint kinds that did not report the metric. This silently scaled values by `motions_with_metric / total_motions` (e.g. `constraint_root2d_acc` displayed as ~0.57 when the true value was ~0.93). Both the printed table and `summary_rows.json` are affected, including the combined constraints row that merges `constraints_withtext` and `constraints_notext`. Text-following metrics, foot-skate/contact metrics, and TMR metrics are unchanged. - Updated Kimodo benchmark results in the documentation with this fix applied. ## [2026-04-24] ### Added - Support for `TEXT_ENCODER_DEVICE` environment variable to force LLM2Vec to use the CPU instead of GPU. Setting `TEXT_ENCODER_DEVICE=cpu` reduces VRAM usage to <3 GB with a fairly small speed hit. - `--save_example_dir` argument to `kimodo_gen` to save outputs to an example directory that can be directly loaded into `kimodo_demo` ### Fixed - Bug in post-processing that was incorrectly making the smoothed root the target for the root in full-body constraints - Modified how transitions are handled in multi-prompt generation to improve smoothness ### Removed - `share_transition` and `percentage_transition_override` options from python API for multi-prompt generation ## [2026-04-13] ### Added - Option `--bvh_standard_tpose` to use standard T-pose for BVH file saved from `generate.py` - Option to use standard T-pose for BVH file saved or downloaded from demo - Option to input/output BVH files that use standard T-pose with `motion_convert.py` - Added BVH file containing the standard Kimodo T-pose to `kimodo/assets/skeletons/somaskel77/somaskel77_standard_tpose.bvh` - Updated documentation with these new options ## [2026-04-10] ### Added - [Kimodo-SOMA-RP-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1) and [Kimodo-SOMA-SEED-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1) models and added support in the codebase. If not specified, the latest version of the models will be used automatically with the demo and CLI. - [Kimodo Motion Generation Benchmark](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) for standardized evaluation of motion generation models training on the BONES-SEED dataset. - Scripts to construct the full benchmark, generate motions for test cases, and compute evaluation metrics. - Documentation explaining the benchmark and how to use the evaluation pipeline. - [TMR-SOMA-RP-v1](https://huggingface.co/nvidia/TMR-SOMA-RP-v1) motion-text embedding model to be used for evaluation metrics. - Added option to load LLM2Vec text encoder in fp32 precision. ### Fixed - Always use batch size 1 with LLM2Vec to avoid unexpected behavior of different embeddings based on batch size. - Load LLM2Vec directly onto the GPU, if available. - Updated documentation on constraints with more details. ## [2026-04-01] ### Fixed - Fix unnecessary text encoder reload when switching between models in the interactive demo (if not using the text encoder server API). ## [2026-03-31] ### Added - New `kimodo_convert` CLI tool for converting generated motions between formats (NPZ, BVH, MuJoCo CSV, AMASS NPZ). - Support for loading and saving BVH, CSV, and NPZ motion files in the interactive demo. ## [2026-03-27] ### Fixed - Bug fix for foot contact visualization in the interactive demo. - Patch bug with BVH export for SOMA models. ## [2026-03-19] ### Changed - **Breaking:** Model inputs/outputs now use the SOMA 77-joint skeleton (`somaskel77`). This affects saved motion formats and constraint files from previous versions. ### Added - Released timeline annotations for the BONES-SEED dataset on HuggingFace. ## [2026-03-16] - Initial Release ### Added - Open-source release of Kimodo codebase under Apache-2.0 license. - Five model variants: Kimodo-SOMA-RP-v1, Kimodo-G1-RP-v1, Kimodo-SOMA-SEED-v1, Kimodo-G1-SEED-v1, Kimodo-SMPLX-RP-v1. - Command-line interface (`kimodo_gen`) for motion generation with text prompts and kinematic constraints. - Interactive web-based motion authoring demo (`kimodo_demo`) with timeline editor, constraint tracks, and 3D visualization. - Support for multiple output formats: default NPZ, MuJoCo qpos CSV (G1), AMASS NPZ (SMPL-X). - Documentation site with quick start guide, installation instructions, CLI reference, and API docs. - Compatibility with downstream tools: ProtoMotions (physics-based policy training) and GMR (motion retargeting). ================================================ FILE: CONTRIBUTING.MD ================================================ # How to Contribute ## Code Reviews All submissions require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. ## Signing Your Work * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license. * Any contribution which contains commits that are not Signed-Off will not be accepted. * To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes: ```bash $ git commit -s -m "Add cool feature." ``` This will append the following to your commit message: ``` Signed-off-by: Your Name ``` * Full text of the DCO: ``` Developer Certificate of Origin Version 1.1 Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 1 Letterman Drive Suite D4700 San Francisco, CA, 94129 Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. ``` ``` Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ``` ================================================ FILE: Dockerfile ================================================ FROM nvcr.io/nvidia/pytorch:24.10-py3 # Avoid some interactive prompts + make pip quieter/reproducible-ish ENV DEBIAN_FRONTEND=noninteractive \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 # Where your code will live inside the container WORKDIR /workspace # System deps RUN apt-get update && apt-get install -y --no-install-recommends \ git curl ca-certificates \ cmake build-essential \ gosu \ && rm -rf /var/lib/apt/lists/* # Some base images ship a broken `/usr/local/bin/cmake` shim (from a partial pip install), # which shadows `/usr/bin/cmake` and breaks builds that invoke `cmake` (e.g. MotionCorrection). # Prefer the system cmake. RUN rm -f /usr/local/bin/cmake || true # Install from docker_requirements.txt: kimodo editable (-e .), # but MotionCorrection non-editable (./MotionCorrection). The -e . line ensures [project.scripts] # from pyproject.toml are installed (kimodo_gen, kimodo_demo, kimodo_textencoder). # SKIP_MOTION_CORRECTION_IN_SETUP=1 so setup.py does not bundle motion_correction; it is # installed separately from ./MotionCorrection in the requirements file (non-editable). COPY docker_requirements.txt /workspace/docker_requirements.txt COPY setup.py /workspace/setup.py COPY pyproject.toml /workspace/pyproject.toml COPY kimodo /workspace/kimodo COPY kimodo-viser /workspace/kimodo-viser COPY MotionCorrection /workspace/MotionCorrection RUN --mount=type=cache,target=/root/.cache/pip \ python -m pip install --upgrade pip \ && SKIP_MOTION_CORRECTION_IN_SETUP=1 python -m pip install -r docker_requirements.txt # Use the docker-entrypoint script, to allow the docker to run as the actual user instead of root COPY kimodo/scripts/docker-entrypoint.sh /usr/local/bin/docker-entrypoint RUN chmod +x /usr/local/bin/docker-entrypoint # Default command (change to your entrypoint if you have one) ENTRYPOINT ["docker-entrypoint"] CMD ["bash"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ include setup.py recursive-include kimodo/assets * recursive-include MotionCorrection/src *.cpp *.h *.inl recursive-include MotionCorrection/python *.py *.dll include MotionCorrection/CMakeLists.txt include MotionCorrection/test_example.py ================================================ FILE: MotionCorrection/.gitignore ================================================ # Python __pycache__/ *.py[cod] *$py.class *.so *.egg *.egg-info/ dist/ build/ *.whl .Python develop-eggs/ .installed.cfg pip-log.txt pip-delete-this-directory.txt .pytest_cache/ .coverage htmlcov/ .tox/ .venv venv/ ENV/ env/ # C/C++ *.o *.obj *.exe *.out *.app *.dll *.dylib *.lib *.a *.la *.lo *.slo *.ko *.elf *.ilk *.map *.exp *.gch *.pch *.idb *.pdb *.mod *.smod *.lai # CMake CMakeCache.txt CMakeFiles/ CMakeScripts/ cmake_install.cmake install_manifest.txt CTestTestfile.cmake _deps/ cmake-build-*/ CMakeUserPresets.json # IDE .vscode/ .idea/ *.swp *.swo *~ .DS_Store *.iml .project .cproject .settings/ # Visual Studio .vs/ *.user *.suo *.userosscache *.sln.docstates *.VC.db *.VC.opendb # Build directories build/ Build/ out/ dist/ temp/ # Logs *.log ================================================ FILE: MotionCorrection/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.15) project(motion_correction) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Find Python find_package(Python3 COMPONENTS Interpreter Development REQUIRED) # Find or fetch pybind11 find_package(pybind11 CONFIG QUIET) if(NOT pybind11_FOUND) message(STATUS "pybind11 not found, fetching from GitHub...") include(FetchContent) FetchContent_Declare( pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11.git GIT_TAG v2.11.1 ) FetchContent_MakeAvailable(pybind11) endif() # Find or fetch Eigen find_package(Eigen3 3.3 CONFIG QUIET) if(NOT Eigen3_FOUND) message(STATUS "Eigen3 not found, fetching from GitLab...") include(FetchContent) FetchContent_Declare( Eigen GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git GIT_TAG 3.4.0 ) set(EIGEN_BUILD_DOC OFF CACHE BOOL "" FORCE) set(BUILD_TESTING OFF CACHE BOOL "" FORCE) set(EIGEN_BUILD_PKGCONFIG OFF CACHE BOOL "" FORCE) FetchContent_MakeAvailable(Eigen) endif() # Source files set(MATH_SOURCES src/cpp/Math/Matrix.cpp src/cpp/Math/Quaternion.cpp src/cpp/Math/Transform.cpp src/cpp/Math/Types.cpp src/cpp/Math/Vector.cpp ) set(ANIM_SOURCES src/cpp/AnimProcessing/InverseKinematics.cpp src/cpp/AnimProcessing/TrajectoryCorrector.cpp src/cpp/AnimProcessing/Utility.cpp ) # Create static library for the core functionality add_library(motion_correction_cpp_base STATIC ${MATH_SOURCES} ${ANIM_SOURCES}) # Enable Position Independent Code (required for linking into shared library) set_target_properties(motion_correction_cpp_base PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(motion_correction_cpp_base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp ) if(TARGET Eigen3::Eigen) target_link_libraries(motion_correction_cpp_base PUBLIC Eigen3::Eigen) else() target_link_libraries(motion_correction_cpp_base PUBLIC eigen) endif() target_compile_definitions(motion_correction_cpp_base PUBLIC EIGEN_MPL2_ONLY) # Compiler-specific settings if(MSVC) # MSVC-specific flags target_compile_options(motion_correction_cpp_base PRIVATE /W4 /arch:AVX) else() # GCC/Clang flags (also applies to MinGW on Windows) # Enable SSE4.1 and AVX instructions for SIMD operations target_compile_options(motion_correction_cpp_base PRIVATE -Wall -Wextra -msse4.1 -mavx) endif() # Python bindings pybind11_add_module(_motion_correction src/cpp/BindingsPython.cpp) target_link_libraries(_motion_correction PRIVATE motion_correction_cpp_base) target_include_directories(_motion_correction PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp ) # Install the Python module install(TARGETS _motion_correction LIBRARY DESTINATION python/motion_correction) install(FILES python/motion_correction/__init__.py DESTINATION python/motion_correction) install(FILES python/motion_correction/motion_postprocess.py DESTINATION python/motion_correction) ================================================ FILE: MotionCorrection/MANIFEST.in ================================================ include CMakeLists.txt include test_example.py recursive-include src *.cpp *.h *.inl recursive-include python *.py *.dll ================================================ FILE: MotionCorrection/README.md ================================================ # motion_correction Standalone `correct_motion` implementation packaged as a small C++ motion trajectory correction library with Python bindings. ## Installation Guide ### Prerequisites Ensure you have a C++17 compatible compiler (GCC 7.0+, Clang 5.0+, or MSVC 2017+) and CMake 3.15+. On Windows, install MinGW-w64 or Visual Studio with C++ tools. On Linux, install `build-essential` and `cmake`. This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use. ### Build & Install #### Standard Installation ```bash pip install . ``` #### Development Installation ```bash pip install -e . ``` ### Verify Installation ```python import motion_correction print("Installation successful!") ``` You can also run `python run_test.py` for a simple test. ================================================ FILE: MotionCorrection/python/motion_correction/__init__.py ================================================ from ._motion_correction import * ================================================ FILE: MotionCorrection/python/motion_correction/motion_postprocess.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import os import pickle import numpy as np import torch import motion_correction def correct_motion( hipTranslations, jointRotations, contacts, hipTranslationsInput, rotationsInput, constraint_masks, contact_threshold, root_margin, working_rig, has_double_ankle_joints=False, ): joint_names = [x.name for x in working_rig] joint_parents = [ joint_names.index(working_rig[i].parent) if working_rig[i].parent in joint_names else -1 for i in range(len(working_rig)) ] joint_ref_translations = [list(x.t_pose_translation) for x in working_rig] joint_ref_rotations = [list(x.t_pose_rotation) for x in working_rig] left_hand_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "LeftHand"] if len(left_hand_idx) != 1: raise RuntimeError(f"correct_motion: Expected exactly one joint with LeftHand tag") left_hand_idx = left_hand_idx[0] right_hand_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "RightHand"] if len(right_hand_idx) != 1: raise RuntimeError(f"correct_motion: Expected exactly one joint with RightHand tag") right_hand_idx = right_hand_idx[0] left_foot_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "LeftFoot"] if len(left_foot_idx) != 1: raise RuntimeError(f"correct_motion: Expected exactly one joint with LeftFoot tag") left_foot_idx = left_foot_idx[0] right_foot_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "RightFoot"] if len(right_foot_idx) != 1: raise RuntimeError(f"correct_motion: Expected exactly one joint with RightFoot tag") right_foot_idx = right_foot_idx[0] end_frame = hipTranslations.shape[1] default_mask = torch.zeros(hipTranslations.shape[1], dtype=torch.float32) root_mask = constraint_masks.get("Root", default_mask) full_body_mask = constraint_masks.get("FullBody", default_mask) left_hand_mask = constraint_masks.get("LeftHand", default_mask) right_hand_mask = constraint_masks.get("RightHand", default_mask) left_foot_mask = constraint_masks.get("LeftFoot", default_mask) right_foot_mask = constraint_masks.get("RightFoot", default_mask) batch_size = hipTranslations.shape[0] for b in range(batch_size): hipTranslationsCorrected = hipTranslations[b, :end_frame].detach().cpu().flatten().numpy().astype(np.float32) rotationsCorrected = jointRotations[b, :end_frame].detach().cpu().flatten().numpy().astype(np.float32) hipTranslationsInput_flat = hipTranslationsInput.detach().cpu().flatten().numpy().astype(np.float32) rotationsInput_flat = rotationsInput.detach().cpu().flatten().numpy().astype(np.float32) ctcs = contacts[b].detach().cpu().flatten().numpy().astype(np.float32) motion_correction.correct_motion( hipTranslationsCorrected, rotationsCorrected, hipTranslationsInput_flat, rotationsInput_flat, full_body_mask, left_hand_mask, right_hand_mask, left_foot_mask, right_foot_mask, root_mask, np.array(ctcs, dtype=np.float32), joint_parents, joint_ref_translations, joint_ref_rotations, left_hand_idx, right_hand_idx, left_foot_idx, right_foot_idx, contact_threshold, root_margin, has_double_ankle_joints, ) hipTranslations[b, :end_frame] = torch.from_numpy( hipTranslationsCorrected.reshape(*hipTranslations[b, :end_frame].shape) ) jointRotations[b, :end_frame] = torch.from_numpy( rotationsCorrected.reshape(*jointRotations[b, :end_frame].shape) ) ================================================ FILE: MotionCorrection/run_test.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import torch from motion_correction.motion_postprocess import correct_motion class Joint: def __init__(self, name, parent, t_pose_translation, t_pose_rotation, retarget_tag=""): self.name = name self.parent = parent self.t_pose_translation = t_pose_translation self.t_pose_rotation = t_pose_rotation self.retarget_tag = retarget_tag def create_test_rig(): return [ Joint("Hips", None, [0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0], "Root"), Joint("Spine", "Hips", [0.0, 0.1, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("LeftUpLeg", "Hips", [-0.1, -0.05, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("LeftLeg", "LeftUpLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("LeftFoot", "LeftLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0], "LeftFoot"), Joint("RightUpLeg", "Hips", [0.1, -0.05, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("RightLeg", "RightUpLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("RightFoot", "RightLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0], "RightFoot"), Joint("LeftArm", "Spine", [-0.3, 0.3, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("LeftHand", "LeftArm", [-0.3, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], "LeftHand"), Joint("RightArm", "Spine", [0.3, 0.3, 0.0], [0.0, 0.0, 0.0, 1.0]), Joint("RightHand", "RightArm", [0.3, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], "RightHand"), ] if __name__ == "__main__": # Test data batch_size, num_frames, num_joints = 1, 60, 12 hipTranslations = torch.randn(batch_size, num_frames, 3) jointRotations = torch.randn(batch_size, num_frames, num_joints, 4) jointRotations = jointRotations / jointRotations.norm(dim=-1, keepdim=True) contacts = torch.rand(batch_size, num_frames, 4) hipTranslationsInput = hipTranslations.clone() rotationsInput = jointRotations.clone() constraint_masks = { "Root": torch.zeros(num_frames), "FullBody": torch.zeros(num_frames), "LeftHand": torch.zeros(num_frames), "RightHand": torch.zeros(num_frames), "LeftFoot": torch.zeros(num_frames), "RightFoot": torch.zeros(num_frames), } working_rig = create_test_rig() # Run correction correct_motion( hipTranslations=hipTranslations, jointRotations=jointRotations, contacts=contacts, hipTranslationsInput=hipTranslationsInput, rotationsInput=rotationsInput, constraint_masks=constraint_masks, contact_threshold=0.5, root_margin=0.01, working_rig=working_rig, ) print("Test completed successfully") ================================================ FILE: MotionCorrection/setup.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Setup script for correct_motion standalone package.""" import os import shutil import subprocess import sys from pathlib import Path from setuptools import Extension, setup from setuptools.command.build_ext import build_ext class CMakeExtension(Extension): def __init__(self, name, sourcedir=""): Extension.__init__(self, name, sources=[]) self.sourcedir = os.path.abspath(sourcedir) class CMakeBuild(build_ext): def run(self): try: subprocess.check_output(["cmake", "--version"]) except OSError: raise RuntimeError("CMake must be installed to build this package") for ext in self.extensions: self.build_extension(ext) def build_extension(self, ext): # import pdb; pdb.set_trace() # Debug build process extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) cmake_args = [ f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}", f"-DPYTHON_EXECUTABLE={sys.executable}", ] cfg = "Debug" if self.debug else "Release" build_args = ["--config", cfg] cmake_args += [f"-DCMAKE_BUILD_TYPE={cfg}"] use_mingw = False mingw_bin = None if sys.platform == "win32": generator = os.environ.get("CMAKE_GENERATOR", "") if generator: cmake_args = ["-G", generator] + cmake_args if "mingw" in generator.lower(): use_mingw = True else: cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"] else: # Try MinGW Makefiles as default on Windows try: subprocess.check_output(["g++", "--version"], stderr=subprocess.STDOUT) use_mingw = True cmake_args = ["-G", "MinGW Makefiles"] + cmake_args build_args = [] # MinGW Makefiles do not accept --config except (OSError, subprocess.CalledProcessError): # If g++ is not found, let CMake use its default (Visual Studio) cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"] if use_mingw: gxx_path = shutil.which("g++") if gxx_path: mingw_bin = Path(gxx_path).parent else: build_args += ["--", "-j4"] env = os.environ.copy() env["CXXFLAGS"] = f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"' if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) subprocess.check_call(["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp) if use_mingw and mingw_bin is not None: runtime_libs = [ "libstdc++-6.dll", "libgcc_s_seh-1.dll", "libwinpthread-1.dll", ] extdir_path = Path(extdir) extdir_path.mkdir(parents=True, exist_ok=True) for lib_name in runtime_libs: src_path = mingw_bin / lib_name if src_path.exists(): shutil.copy2(src_path, extdir_path / lib_name) else: self.announce( f"Warning: Expected MinGW runtime DLL '{lib_name}' not found next to g++ (looked in {mingw_bin}). " "The built extension may fail to import if the DLL is not on PATH.", level=3, ) setup( name="motion_correction", version="1.0.0", author="NVIDIA", description="Standalone correct_motion function", long_description="", packages=["motion_correction"], package_dir={"": "python"}, ext_modules=[CMakeExtension("motion_correction._motion_correction")], cmdclass={"build_ext": CMakeBuild}, zip_safe=False, python_requires=">=3.8", install_requires=[ "torch>=1.10.0", "numpy>=1.19.0", # 'cmake' # can install this via pip if the windows system does not have it. But need to run this by yourself before build, not in here. ], ) ================================================ FILE: MotionCorrection/src/cpp/AnimProcessing/InverseKinematics.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "InverseKinematics.h" #include "Math/Scalar.h" #include using namespace IK; namespace { float getAngleWithTwoSideVectors(const Math::Vector& vecLeft, const Math::Vector& vecRight) { auto lNorm = vecLeft.GetNormalized3(); auto rNorm = vecRight.GetNormalized3(); float cosine = lNorm.GetDot3(rNorm); float sine = lNorm.Cross3(rNorm).GetLength3(); return atan2f(sine, cosine); // in radian } float getAngleWithCosineRule (const float lSideLeft, const float lSideRight, const float lSideAcross) { float val = (lSideRight * lSideRight + lSideLeft * lSideLeft - lSideAcross * lSideAcross) / (2.0f * lSideLeft * lSideRight); val = Math::Clamp(val, -1.0f, 1.0f); // numerical stability. also avoid impossible trangulars return acosf(val); // in radian } } void IK::TwoBoneIk( Pose& pose, const Math::Transform& rootTransform, uint32_t cIdx, float weight, const Math::Vector& target, const std::vector& joint_parents_vec, const Math::Vector& hintOffset ) { weight = Math::Clamp(weight, 0.0f, 1.0f); if (!(weight > 0.0f)) return; // Two bone IK: joints are represented as "a", "b", "c" in the below comments: // 1. stage 1, bend joint a and joint b, so that |ac| = |at|, while vec_ac maintain the same direction // 2. stage 2, rotate start joint a so that c and t are in the same place // a a a | // |\ |\ |\ | // | \ | \ | \ | // | \ (stage 1 ->) | \ (stage 2 ->) | \ | // | b | b | b | // | \ | | | / | // | \ | | | / | // t c t c t(c) | // (a is the root joint, b is the middle joint and c is the end joint) // int32_t bIdx = joint_parents_vec[cIdx]; if (bIdx < 0) { return; } int32_t aIdx = joint_parents_vec[bIdx]; if (aIdx < 0) { return; } // Find the parent world transform of joint a: Math::Transform aParentWorldTransform = Math::Transform::Identity; int32_t idx = joint_parents_vec[aIdx]; while (idx >= 0) { aParentWorldTransform = aParentWorldTransform * pose[idx]; idx = joint_parents_vec[idx]; } aParentWorldTransform = aParentWorldTransform * rootTransform; // Compute world space transforms of a, b and c: Math::Transform aWorld = pose[aIdx] * aParentWorldTransform; Math::Transform bWorld = pose[bIdx] * aWorld; Math::Transform cWorld = pose[cIdx] * bWorld; auto a = aWorld.GetTranslation(); auto b = bWorld.GetTranslation(); auto c = cWorld.GetTranslation(); auto t = Math::Vector::Lerp(c, target, weight); // step 1 (stage 1): extend / contract the joint chain to match the distance float eps = 0.0001f; // numerical stability float l_ab = (b - a).Length3().GetX(); float l_bc = (c - b).Length3().GetX(); float l_at = (a - t).Length3().GetX(); l_at = Math::Clamp(l_at, eps, (l_ab + l_bc) * 0.999f); // when not reachable, replace with maximum reachable length // get the current angles float theta_bac_current = getAngleWithTwoSideVectors(a - b, a - c); float theta_abc_current = getAngleWithTwoSideVectors(b - a, b - c); // get the desired angles if (l_ab < eps || l_bc < eps || l_at < eps) { return; // the length is too small. rejecting potentially numerically unstable requests. } float theta_bac_desired = getAngleWithCosineRule(l_ab, l_at, l_bc); float theta_abc_desired = getAngleWithCosineRule(l_ab, l_bc, l_at); // in joint[0]'s parent's space Math::Vector rotationAxis = Math::Vector::Cross3(c - a, bWorld.TransformPoint(hintOffset) - a); float l = rotationAxis.GetLength3(); if (l == 0) { rotationAxis = Math::Vector(0,0,1); } else { rotationAxis /= l; } // get the rotation with axis in the local space of joint a and joint b Math::Vector rotationAxisLocalInBSpace = bWorld.GetRotation().RotateVectorInverse(rotationAxis); Math::Transform rotateInB( Math::Quaternion(rotationAxisLocalInBSpace, (theta_abc_desired - theta_abc_current)), Math::Vector::Zero); pose[bIdx] = rotateInB * pose[bIdx]; Math::Vector rotationAxisLocalInASpace = aWorld.GetRotation().RotateVectorInverse(rotationAxis); Math::Transform rotateInA( Math::Quaternion(rotationAxisLocalInASpace, (theta_bac_desired - theta_bac_current)), Math::Vector::Zero); pose[aIdx] = rotateInA * pose[aIdx]; // recompute a's world space transform as we're going to need it: aWorld = pose[aIdx] * aParentWorldTransform; // step 2 (stage 2): rotate joint a so that the target and the end joint c matches auto acLocal = aWorld.GetRotation().RotateVectorInverse( c - a); auto atLocal = aWorld.GetRotation().RotateVectorInverse( target - a); Math::Transform rotateStageTwo( Math::Quaternion::FromRotationBetweenVectors(acLocal, atLocal), Math::Vector::Zero ); pose[aIdx] = rotateStageTwo * pose[aIdx]; } void IK::OneBoneIk( Pose& pose, const Math::Transform& rootTransform, uint32_t bIdx, float weight, const Math::Vector& target, const std::vector& joint_parents_vec ) { weight = Math::Clamp(weight, 0.0f, 1.0f); if (!(weight > 0.0f)) return; int32_t aIdx = joint_parents_vec[bIdx]; if (aIdx < 0) { return; } // Find the parent world transform of joint a: Math::Transform aParentWorldTransform = Math::Transform::Identity; int32_t idx = joint_parents_vec[aIdx]; while (idx >= 0) { aParentWorldTransform = aParentWorldTransform * pose[idx]; idx = joint_parents_vec[idx]; } aParentWorldTransform = aParentWorldTransform * rootTransform; // Compute world space transforms of a, b and c: Math::Transform aWorld = pose[aIdx] * aParentWorldTransform; Math::Transform bWorld = pose[bIdx] * aWorld; auto abLocal = aWorld.GetRotation().RotateVectorInverse( bWorld.GetTranslation() - aWorld.GetTranslation()); auto atLocal = aWorld.GetRotation().RotateVectorInverse( target - aWorld.GetTranslation()); auto deltaRLocal = Math::Quaternion::NLerp(Math::Quaternion::Identity, Math::Quaternion::FromRotationBetweenVectors(abLocal, atLocal), weight); pose[aIdx].SetRotation(deltaRLocal * pose[aIdx].GetRotation()); } ================================================ FILE: MotionCorrection/src/cpp/AnimProcessing/InverseKinematics.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Math/Transform.h" #include using Pose = std::vector; namespace IK { void TwoBoneIk( Pose& pose, const Math::Transform& rootTransform, uint32_t jointIdx, float weight, const Math::Vector& target, const std::vector& joint_parents_vec, const Math::Vector& hintOffset = Math::Vector::Zero ); void OneBoneIk( Pose& pose, const Math::Transform& rootTransform, uint32_t jointIdx, float weight, const Math::Vector& target, const std::vector& joint_parents_vec ); } ================================================ FILE: MotionCorrection/src/cpp/AnimProcessing/TrajectoryCorrector.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "TrajectoryCorrector.h" #include static void removeRows( Eigen::SparseMatrix& M, Eigen::MatrixXd *v, int minCoeffs) { Eigen::SparseMatrix rowMajorMat = M; rowMajorMat.makeCompressed(); // Ensure compressed format std::vector> triplets; triplets.reserve(rowMajorMat.nonZeros()); int newRow = 0; for (int i = 0; i < rowMajorMat.outerSize(); ++i) { // Get nonzero count via outerIndexPtr (compressed format only) int nnz = rowMajorMat.outerIndexPtr()[i + 1] - rowMajorMat.outerIndexPtr()[i]; if (nnz >= minCoeffs) { // Iterate through nonzeros in this row for (Eigen::SparseMatrix::InnerIterator it(rowMajorMat, i); it; ++it) { triplets.emplace_back(newRow, it.col(), it.value()); } if (v) { v->row(newRow) = v->row(i); } newRow++; } } M = Eigen::SparseMatrix(newRow, M.cols()); M.setFromTriplets(triplets.begin(), triplets.end()); if (v) { v->conservativeResize(newRow, v->cols()); } } static void multVelWeights( Eigen::SparseMatrix& V, Eigen::MatrixXd* v_rhs, const Eigen::VectorXd& velocityWeights ) { Eigen::SparseMatrix rowMajorMat = V; rowMajorMat.makeCompressed(); // Ensure compressed format std::vector> triplets; triplets.reserve(rowMajorMat.nonZeros()); for (int i = 0; i < rowMajorMat.outerSize(); ++i) { // Iterate through nonzeros in this row Eigen::SparseMatrix::InnerIterator it(rowMajorMat, i); double vel_weight = velocityWeights[it.col()]; for(; it; ++it) { triplets.emplace_back(i, it.col(), it.value() * vel_weight); } if (v_rhs) { (*v_rhs).row(i) = (*v_rhs).row(i) * vel_weight; } } } void TrajectoryCorrector::computeDiffMats( Eigen::SparseMatrix& V, Eigen::SparseMatrix& A, uint32_t N, const Eigen::VectorXd& velocityWeights, Eigen::MatrixXd* v_rhs, Eigen::MatrixXd* a_rhs) { std::vector> tripletList; // Identity matrix" tripletList.clear(); Eigen::SparseMatrix I(N, N); for (uint32_t i = 0; i < N; ++i) { tripletList.emplace_back(i, i, 1); } I.setFromTriplets(tripletList.begin(), tripletList.end()); // urr, a time translation operator? Gives you the value on the next frame. // Leave the last row blank because that's the end of the timeline. tripletList.clear(); Eigen::SparseMatrix T(N, N); Eigen::MatrixXd t_rhs; for(uint32_t i = 0; i < N-1; ++i) { // next frame is tripletList.emplace_back(i, i+1, 1.0); } T.setFromTriplets(tripletList.begin(), tripletList.end()); // v = Tx + t_rhs - x; // v = (T - I)x + t_rhs; V = T - I; if (v_rhs) { *v_rhs = t_rhs; } removeRows(V, v_rhs, 2); // a = -x + 2 (T x + t_rhs) - (T (T x + t_rhs) + t_rhs) // a = (-I + 2 T - T^2) x + t_rhs - T t_rhs A = 2 * T - I - T * T; if (a_rhs) { *a_rhs = t_rhs - T * t_rhs; } removeRows(A, a_rhs, 3); if (velocityWeights.size() > 0) { multVelWeights(V, v_rhs, velocityWeights); } } TrajectoryCorrector::TrajectoryCorrector( const Eigen::VectorXd& margins, float pos_weight, float vel_weight, float acc_weight, const Eigen::VectorXd& velocityWeights, uint32_t admm_iters ) : m_admm_iters(admm_iters) { // This class is used to modify a trajectory to hit specific values at // specific frames, while respecting the following soft constraints: // * Preserve the original positions // * Preserve the original velocities // * Preserve the original accelerations // The weights of these soft constraints are specified in "pos_weight" etc. // This is posed as a minimization problem: // E(x) = pos_weight * |x - x_orig|^2 + vel_weight * |V x - V x_orig| + acc_weight * |A x - A x_orig| // where you minimize E(x) subject to specified values at indices where "mask" // is equal to 1. V is a matrix that computes the N-1 velocities between frame n-1 and frame n, // and A computes the N-2 accelerations associated with frames n-1, n and n+1. // In addition to this, there are constraints where the trajectory is allowed to // deviate from the target points by a maximum margin. The "margins" input to this // constructor specifies what type of constraint is active on a particular frame: // margins[0] < 0 ==> unconstrained // margins[i] == 0 ==> pinned on this frame // margins[i] > 0 ==> can deviate within the margin // I'm solving the optimization problem using ADMM, ie following equations // 8,9,10 on this paper: // https://mattoverby.net/files/admm-pd-overby17.pdf uint32_t N = uint32_t(margins.rows()); for(uint32_t i = 0; i < N; ++i) { if( margins[i] > 0 ) { m_margin_locs.push_back(i); m_margin_vals.push_back(margins[i]); } if(margins[i] == 0) { m_constrained_locs.push_back(i); } else { m_unconstrained_locs.push_back(i); } } Eigen::SparseMatrix V, A; computeDiffMats( V, A, N, velocityWeights ); // build an identity matrix: std::vector> tripletList; Eigen::SparseMatrix I(N, N); for (uint32_t i = 0; i < N; ++i) { tripletList.emplace_back(i, i, 1.0f); } I.setFromTriplets(tripletList.begin(), tripletList.end()); /* self.N = ( self.pos_weight * torch.diag_embed(torch.full_like(interp_mask, 1)) + self.vel_weight * torch.matmul(self.V.T, self.V) + self.acc_weight * torch.matmul(self.A.T, self.A) ) */ m_N = pos_weight * I + vel_weight * (V.transpose() * V) + acc_weight * (A.transpose() * A); double diagMax = 0; for (uint32_t i = 0; i < N; ++i) { diagMax = std::max(m_N.coeff(i,i), diagMax); } m_admm_stepsize = 0.5f * sqrtf(float(diagMax)); /* M = ( self.N + self.admm_stepsize * torch.matmul(self.S.T, self.S) ) */ tripletList.clear(); Eigen::SparseMatrix M(N, N); for( auto i : m_margin_locs) { tripletList.emplace_back(i, i, m_admm_stepsize); } M.setFromTriplets(tripletList.begin(), tripletList.end()); M += m_N; /* self.lhsmat = torch.matmul(self.U.T, torch.matmul(self.M, self.U)) self.lhsmat_inv = torch.inverse(self.lhsmat) */ tripletList.clear(); Eigen::SparseMatrix S(m_unconstrained_locs.size(), N); for (uint32_t i = 0; i < m_unconstrained_locs.size(); ++i) { uint32_t ifull = m_unconstrained_locs[i]; tripletList.emplace_back(i, ifull, 1.0f); } S.setFromTriplets(tripletList.begin(), tripletList.end()); M = S * M * S.transpose(); if(m_unconstrained_locs.size()) { m_system_lu.compute(M); } } void TrajectoryCorrector::Interpolate( Eigen::MatrixXd& x, const Eigen::MatrixXd& observations, const Eigen::MatrixXd& ref_positions ) const { if( m_constrained_locs.empty() && m_margin_locs.empty() ) { x = ref_positions; return; } uint32_t numCols = uint32_t(x.cols()); if(m_margin_locs.empty()) { x_update( x, Eigen::MatrixXd(0, numCols), Eigen::MatrixXd(0, numCols), ref_positions, observations ); } else { x = ref_positions; Eigen::MatrixXd z(m_margin_locs.size(), numCols); Eigen::MatrixXd z_t(m_margin_locs.size(), numCols); Eigen::MatrixXd u(m_margin_locs.size(), numCols); for( uint32_t i = 0; i < m_margin_locs.size(); ++i) { for(uint32_t j = 0; j < numCols; ++j) { z_t(i, j) = observations(m_margin_locs[i], j); z(i, j) = ref_positions(m_margin_locs[i], j); u(i, j) =0; } } for(uint32_t i = 0; i < m_admm_iters; ++i) { x_update( x, z, u, ref_positions, observations ); z_update(z, x, z_t, u); u_update(u, x, z); } } } void TrajectoryCorrector::x_update( Eigen::MatrixXd &x, const Eigen::MatrixXd &z, const Eigen::MatrixXd &u, const Eigen::MatrixXd &x_t, // reference positions - defines the original shape of the curve that we want to preserve const Eigen::MatrixXd &x_o // target positions for constraints ) const { uint32_t numRows = uint32_t(x.rows()); uint32_t numCols = uint32_t(x.cols()); // Here's what we're minimizing with ADMM: // min f(x) + g(z) // s.t A x + B z = c // Make these choices so that z = S x: // A = S, B = -I, c = 0 // // g(z) = infinity when it's too far away from z_target, zero otherwise // // f(x) penalizes deviations in position, velocity and acceleration // from a reference trajectory: // // f(x) = 1/2( // kx |I x - x_t|^2 + // kv |V x - v_t|^2 + // kx |A x - a_t|^2 // ) // // It's also infinite when components of x devaiate from their target // values where they're pinned... // Substituting the matrices into the standard admm update rules gives us this: // x{n+1} = argmin(f(x) + ρ/2 |S x - z{n} + u{n}|^2) // z{n+1} = argmin(g(z) + ρ/2 |S x{n+1} - z + u{n}|^2) // u{n+1} = u{n} + (S x{n+1} - z{n+1}) // // x update: // // x{n+1} = argmin 1/2 ( // kx |I x - x_t|^2 + // kv |V x - v_t|^2 + // ka |A x - a_t|^2 + // ρ |S x - d|^2 // ) // d = (z{n} - u{n}) // Rewrite in a friendlier way: // |A x - b|^2 = x^T A^T A x - 2 x^T A^T b + C // 1/2 ( // kx (x^T x - 2 x^T x_t) + // kv (x^T V^T V x - 2 x^T V^T v_t) + // ka (x^T A^T A x - 2 x^T A^T a_t) + // ρ (x^T S^T S x - 2 x^T S^T d) // ) + C // // 1/2 x^T (kx I + kv V^T V + ka A^T A + ρ S^T S) x // - x^T (kx x_t + kv V^T v_t + ka A^T a_t + ρ S^T d) // + C // // voila: // M = kx I + kv V^T V + ka A^T A + ρ S^T S // r = kx x_t + kv V^T v_t + ka A^T a_t + ρ S^T d // E = 1/2 x^T M x - x^T r + C /* r = ( torch.matmul(self.N, x_t - x_o_filtered) + self.admm_stepsize * torch.matmul(self.S.T, - u + z) ) */ Eigen::MatrixXd x_diffs(x_t); for(auto i : m_constrained_locs) { for(uint32_t j = 0; j < numCols; ++j) { x_diffs(i, j) = x_diffs(i,j) - x_o(i,j); } } Eigen::MatrixXd r = m_N * x_diffs; for(uint32_t i = 0; i < m_margin_locs.size(); ++i) { uint32_t ifull = m_margin_locs[i]; for(uint32_t j = 0; j < numCols; ++j) { r(ifull, j) = r(ifull, j) + m_admm_stepsize * (z(i,j) - u(i,j)); } } // Solve with respect to pin constraints: // x = U x_r + x_o // E = 1/2 (U x_r + x_o)^T M (U x_r + x_o) - (U x_r + x_o)^T r + C // E = 1/2 (x_r^T U^T + x_o^T) M (U x_r + x_o) - (x_r^T U^T + x_o^T) r + C // E = 1/2 (x_r^T U^T M (U x_r + x_o) + x_o^T M (U x_r + x_o)) - x_r^T U^T r - x_o^T r + C // E = 1/2 (x_r^T U^T M U x_r) + x_r^T U^T (M x_o - r) + C // minimized when x_r solves this equation: // U^T M U x_r + U^T (M x_o - r) = 0 // x_r = (U^T M U)^-1 U^T (r - M x_o) // collapse r down to unconstrained variable set: // rhs = torch.matmul(self.U.T, r) uint32_t numRows_reduced = m_unconstrained_locs.size(); Eigen::MatrixXd r_reduced(numRows_reduced, numCols); for(uint32_t i = 0; i < numRows_reduced; ++i) { uint32_t ifull = m_unconstrained_locs[i]; for(uint32_t j = 0; j < numCols; ++j) { r_reduced(i,j) = r(ifull, j); } } // solve system: // x_r = torch.matmul(self.lhsmat_inv, rhs) r_reduced.conservativeResize(r_reduced.rows(), r_reduced.cols()); Eigen::MatrixXd result; if(m_unconstrained_locs.size()) { result = m_system_lu.solve(r_reduced); } // map back to full variable set: // return torch.matmul(self.U, x_r) + x_o_filtered for(uint32_t i = 0; i < numRows_reduced; ++i) { uint32_t ifull = m_unconstrained_locs[i]; for(uint32_t j = 0; j < numCols; ++j) { x(ifull, j) = result(i, j); } } for(auto i : m_constrained_locs) { for(uint32_t j = 0; j < numCols; ++j) { x(i, j) = x_o(i, j); } } } void TrajectoryCorrector::z_update( Eigen::MatrixXd &z, const Eigen::MatrixXd &x, const Eigen::MatrixXd &z_t, const Eigen::MatrixXd &u ) const { uint32_t numCols = uint32_t(z.cols()); for(uint32_t i = 0; i < m_margin_locs.size(); ++i) { // z_diffs = S x + u - z_t uint32_t ifull = m_margin_locs[i]; for(uint32_t j = 0; j < numCols; ++j) { z(i, j) = x(ifull, j) + u(i, j) - z_t(i, j); } // find the norm of the current z diff vector: double z_diff_norm = 0.0; for(uint32_t j = 0; j < numCols; ++j) { double z_diff = z(i, j); z_diff_norm += z_diff * z_diff; } z_diff_norm = sqrt(z_diff_norm); // if the norm is greater than the margin size, we need to rescale // the diff: if( z_diff_norm > m_margin_vals[i] ) { for(uint32_t j = 0; j < numCols; ++j) { z(i, j) = z(i, j) * m_margin_vals[i] / z_diff_norm; } } // add the diff back on to the target: for(uint32_t j = 0; j < numCols; ++j) { z(i, j) = z_t(i, j) + z(i, j); } } } void TrajectoryCorrector::u_update( Eigen::MatrixXd &u, const Eigen::MatrixXd &x, const Eigen::MatrixXd &z ) const { uint32_t numCols = uint32_t(z.cols()); // u += S x - z for(uint32_t i = 0; i < m_margin_locs.size(); ++i) { uint32_t ifull = m_margin_locs[i]; for(uint32_t j = 0; j < numCols; ++j) { u(i,j) += x(ifull, j) - z(i,j); } } } ================================================ FILE: MotionCorrection/src/cpp/AnimProcessing/TrajectoryCorrector.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include class TrajectoryCorrector { public: static void computeDiffMats( Eigen::SparseMatrix& V, Eigen::SparseMatrix& A, uint32_t N, const Eigen::VectorXd& velocityWeights = Eigen::VectorXd(), Eigen::MatrixXd* v_rhs = nullptr, Eigen::MatrixXd* a_rhs = nullptr ); TrajectoryCorrector( const Eigen::VectorXd& margins, float pos_weight, float vel_weight, float acc_weight, const Eigen::VectorXd& velocityWeights = Eigen::VectorXd(), uint32_t admm_iters=100 ); void Interpolate( Eigen::MatrixXd& ret, const Eigen::MatrixXd& observations, const Eigen::MatrixXd& ref_positions ) const; void x_update( Eigen::MatrixXd& x, const Eigen::MatrixXd& z, const Eigen::MatrixXd& u, const Eigen::MatrixXd& x_t, const Eigen::MatrixXd& x_o ) const; void z_update( Eigen::MatrixXd& z, const Eigen::MatrixXd& x, const Eigen::MatrixXd& z_t, const Eigen::MatrixXd& u ) const; void u_update( Eigen::MatrixXd& u, const Eigen::MatrixXd& x, const Eigen::MatrixXd& z ) const; float admm_stepsize() const { return m_admm_stepsize; } const std::vector& margin_locs() { return m_margin_locs; } private: Eigen::SparseMatrix m_N; Eigen::SparseLU> m_system_lu; uint32_t m_admm_iters; std::vector m_margin_locs; std::vector m_margin_vals; std::vector m_unconstrained_locs; std::vector m_constrained_locs; float m_admm_stepsize; }; ================================================ FILE: MotionCorrection/src/cpp/AnimProcessing/Utility.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "TrajectoryCorrector.h" #include "InverseKinematics.h" #include "Utility.h" #include #include #include #include #include using Pose = std::vector; static const float pos_weight = 0.001f; static const float vel_weight = 1.0f; static const float acc_weight = 10.0f; namespace { // Enable with: MOTIONCORRECTION_DEBUG_INTERVALS=1 // Default: off (no Interval printing). bool DebugPrintIntervalsEnabled() { const char* v = std::getenv("MOTIONCORRECTION_DEBUG_INTERVALS"); if (v == nullptr || v[0] == '\0') { return false; } // Treat "0" as false; any other non-empty value enables. return v[0] != '0'; } void FilterContactIntervals( std::vector>& contactIntervals, const std::vector& mask, bool one_bone_contact = false) { std::vector keepIntervals; for (size_t i = 0; i < contactIntervals.size(); ++i) { const auto& interval = contactIntervals[i]; bool startConstrained = (interval.first != 0 && mask[interval.first - 1]); bool endConstrained; endConstrained = (interval.second != mask.size() && mask[interval.second]); if (one_bone_contact) { if (startConstrained || endConstrained) { continue; } } else { // If both the start and end of the contact interval are masked, // there's no way we can correct the contact without popping, so // let's filter these out: if (startConstrained && endConstrained) { continue; } } keepIntervals.push_back(i); } for (size_t i = 0; i < keepIntervals.size(); ++i) { contactIntervals[i] = contactIntervals[keepIntervals[i]]; } contactIntervals.resize(keepIntervals.size()); } std::vector> ComputeContactIntervals( const std::vector& contacts, const std::vector& mask, float contactThreshold) { // turn off the contacts for all frames that are constrained/masked: std::vector contactsNoMask = contacts; for (size_t i = 0; i < mask.size(); ++i) { if (mask[i]) { contactsNoMask[i] = 0; } } // Find intervals that are in contact: std::vector> contactIntervals; int start = -1; for (int frame = 0; frame < mask.size(); ++frame) { bool isContact = contactsNoMask[frame] > contactThreshold; if (isContact && start == -1) { start = frame; } else if (!isContact && start != -1) { contactIntervals.emplace_back(start, frame); start = -1; } } // Close the final interval if needed: if (start != -1) { contactIntervals.emplace_back(start, mask.size()); } return contactIntervals; } void FindContactPoints( std::vector &points, std::vector &inContact, const std::vector& joint_parents_vec, int32_t jointIndex, const std::vector &poses, const std::vector>& contactIntervals, const std::vector& mask, size_t frameCount, float minHeight) { // Find a representative frame for each interval. // If the interval starts after a masked frame, use the start // of the interval, if it ends before a mask use the end, // otherwise use the middle frame. inContact.clear(); inContact.resize(frameCount, 0); points.clear(); points.resize(frameCount); for (size_t i = 0; i < contactIntervals.size(); ++i) { const auto& interval = contactIntervals[i]; int frame = -1; bool startConstrained = (interval.first != 0 && mask[interval.first - 1]); bool endConstrained; endConstrained = (interval.second != mask.size() && mask[interval.second]); // Debug output (opt-in via env var) if (DebugPrintIntervalsEnabled()) { std::cout << "Interval " << i << ": start=" << interval.first << ", end=" << interval.second << ", startConstrained=" << startConstrained << ", endConstrained=" << endConstrained << std::endl; } if(startConstrained) { // If the interval starts on a constraint, use the constrained frame // as a target (doing this modulo mask.size() in case we're looping) frame = interval.first - 1; } else if (endConstrained) { // If the interval ends on a constraint, use the constrained frame // as a target: frame = interval.second; } else { // Otherwise use the midpoint of the interval: frame = (interval.first + interval.second) / 2; } // get the target point: Math::Vector target = Animation::JointLocalToGlobal(joint_parents_vec, jointIndex, poses[frame]).GetTranslation(); for(int i = interval.first; i < interval.second; ++i) { Math::Vector framePt = Animation::JointLocalToGlobal(joint_parents_vec, jointIndex, poses[i]).GetTranslation(); inContact[i] = 1; points[i] = target; if (!startConstrained && !endConstrained) { points[i].SetY(std::max(framePt.GetY(), minHeight)); // std::cout << " Frame " << i << ": SetY with framePt.GetY()=" << framePt.GetY() // << ", minHeight=" << minHeight << std::endl; } } } } float TargetReachFalloff( const std::vector& joint_parents_vec, const Pose& defaultPose, int32_t jointIndex, Animation::IKType ikType, const Math::Vector& target, const Pose& pose, const Math::Transform& rootTx = Math::Transform::Identity) { float maxReach = defaultPose[jointIndex].GetTranslation().GetLength3(); if (ikType == Animation::IKType::kTwoBone) { jointIndex = joint_parents_vec[jointIndex]; ASSERT(jointIndex > -1); maxReach += defaultPose[jointIndex].GetTranslation().GetLength3(); } // Get base joint world Tx jointIndex = joint_parents_vec[jointIndex]; ASSERT(jointIndex > -1); const auto worldTx = Animation::JointLocalToGlobal(joint_parents_vec, jointIndex, pose, rootTx); // Gaussian falloff float targetDist = target.GetDistance3(worldTx.GetTranslation()); float tmp = Math::Max(targetDist / maxReach - 0.99f, 0.f) / 0.01f; tmp = tmp * tmp; return std::exp(-2.f * tmp * tmp); } void CorrectHipsY( std::vector& poses, const std::vector& targetPoses, const std::vector& fullBodyMask, const std::vector& contacts, float contactThreshold ) { // Correct the y coordinates of the root. auto N = poses.size(); Eigen::MatrixXd x(N, 1); Eigen::MatrixXd observations(N, 1); Eigen::MatrixXd xfixed(N, 1); // Fill in the initial trajectory (x) and the values we want to hit when we // warp it (observations): Eigen::VectorXd yCorrectMargins(N); for(size_t frame = 0; frame < N; ++frame) { yCorrectMargins[frame] = fullBodyMask[frame] ? 0.0f : -1.0f; x(frame, 0) = ((float*)&poses[frame][0].GetTranslation())[1]; observations(frame, 0) = ((float*)&targetPoses[frame][0].GetTranslation())[1]; } TrajectoryCorrector ycorrector( yCorrectMargins, pos_weight * 10, vel_weight, acc_weight * 0.1f ); ycorrector.Interpolate( xfixed, observations, x ); // fill channel again: for (uint32_t frame = 0; frame < N; ++frame) { ((float*)&poses[frame][0].GetTranslation())[1] = float(xfixed(frame, 0)); } } void SmoothChannels( Eigen::MatrixXd &x, const std::vector& mask ) { for( uint32_t i=0; i < mask.size(); ++i) { uint32_t i_prev = i == 0 ? 0 : i-1; uint32_t i_next = std::min(uint32_t(i+1), uint32_t(mask.size()-1)); if(i > 0 && mask[i] > 0 && mask[i_prev] == 0) { // if the previous frame is unconstrained and the current frame is constrained, // replace the current frame with the average of its neighbors: for(long j=0; j < x.cols(); ++j) { x(i, j) = 0.5f * (x(i_prev, j) + x(i_next, j)); } } if(mask[i] > 0 && mask[i_next] == 0) { // if the next frame is unconstrained and the current frame is constrained, // replace the current frame with the average of its neighbors: for(long j=0; j < x.cols(); ++j) { x(i, j) = 0.5f * (x(i_prev, j) + x(i_next, j)); } } } } void CorrectHipsXZ( std::vector& poses, const std::vector& targetPoses, const std::vector& fullBodyMask, const std::vector& rootMask, const std::vector& endEffectorPins, const Eigen::VectorXd& velocity_weights, float root_margin ) { auto N = poses.size(); Eigen::VectorXd margins(N); for( size_t i = 0; i < N; ++i ) { margins[i] = fullBodyMask[i] ? 0.0f : -1.0f; } std::vector rootCombinedMask(N, 0.0f); for(size_t i = 0; i < N; ++i) { rootCombinedMask[i] = (fullBodyMask[i] > 0) || (rootMask[i] > 0); if(rootMask[i] > 0 && margins[i] != 0) { margins[i] = root_margin; } for (auto& c : endEffectorPins) { if (c.contactMask[i] && margins[i] != 0) { margins[i] = root_margin; } } } TrajectoryCorrector xzcorrector( margins, pos_weight, vel_weight, acc_weight, velocity_weights ); // Enforce pose constraints on root xz trajectory: Eigen::MatrixXd x(N, 2); Eigen::MatrixXd observations(N, 2); Eigen::MatrixXd x_fixed(N, 2); observations.setZero(); for (uint32_t frame = 0; frame < N; ++frame) { x(frame, 0) = ((float*)&poses[frame][0].GetTranslation())[0]; x(frame, 1) = ((float*)&poses[frame][0].GetTranslation())[2]; observations(frame, 0) = ((float*)&targetPoses[frame][0].GetTranslation())[0]; observations(frame, 1) = ((float*)&targetPoses[frame][0].GetTranslation())[2]; } SmoothChannels(x, rootCombinedMask); xzcorrector.Interpolate( x_fixed, observations, x ); // fill channels again: for (uint32_t frame = 0; frame < N; ++frame) { ((float*)&poses[frame][0].GetTranslation())[0] = float(x_fixed(frame, 0)); ((float*)&poses[frame][0].GetTranslation())[2] = float(x_fixed(frame, 1)); } } void CorrectRotationsForBone( std::vector& poses, const std::vector& targetPoses, const std::vector& mask, const TrajectoryCorrector& corrector, int boneIdx, bool performChannelSmoothing) { auto N = poses.size(); Eigen::MatrixXd x(N, 1); Eigen::MatrixXd observations(N, 1); observations.setZero(); Eigen::MatrixXd x_fixed(N, 1); // Quaternion components can flip when they pass through 180 degree // rotations, so let's convert all the quaternions in this channel to // the forward/up vector representation, modify them, then convert back // to quaternions: // convert time series to 6d forward/up: std::vector forwardUp(6 * N); std::vector targetForwardUp(6 * N); for (uint32_t frame = 0; frame < N; ++frame) { auto q = poses[frame][boneIdx].GetRotation(); auto forward = q.ZAxis(); auto up = q.YAxis(); forwardUp[N * 0 + frame] = forward.GetX(); forwardUp[N * 1 + frame] = forward.GetY(); forwardUp[N * 2 + frame] = forward.GetZ(); forwardUp[N * 3 + frame] = up.GetX(); forwardUp[N * 4 + frame] = up.GetY(); forwardUp[N * 5 + frame] = up.GetZ(); q = targetPoses[frame][boneIdx].GetRotation(); forward = q.ZAxis(); up = q.YAxis(); targetForwardUp[N * 0 + frame] = forward.GetX(); targetForwardUp[N * 1 + frame] = forward.GetY(); targetForwardUp[N * 2 + frame] = forward.GetZ(); targetForwardUp[N * 3 + frame] = up.GetX(); targetForwardUp[N * 4 + frame] = up.GetY(); targetForwardUp[N * 5 + frame] = up.GetZ(); } // correct trajectories: for (uint32_t dim = 0; dim < 6; ++dim) { for (uint32_t frame = 0; frame < N; ++frame) { x(frame, 0) = forwardUp[N * dim + frame]; observations(frame, 0) = mask[frame] * targetForwardUp[N * dim + frame]; } if (performChannelSmoothing) { SmoothChannels(x, mask); } corrector.Interpolate( x_fixed, observations, x ); // fill channel again: for (uint32_t frame = 0; frame < N; ++frame) { forwardUp[N * dim + frame] = float(x_fixed(frame, 0)); } } for (uint32_t frame = 0; frame < N; ++frame) { Math::Vector forward = { forwardUp[N * 0 + frame] ,forwardUp[N * 1 + frame] ,forwardUp[N * 2 + frame] }; Math::Vector up = { forwardUp[N * 3 + frame] ,forwardUp[N * 4 + frame] ,forwardUp[N * 5 + frame] }; forward.Normalize3(); up.Normalize3(); poses[frame][boneIdx].SetRotation(Math::Quaternion::LookRotation(forward, up)); } } void CorrectJointRotations( std::vector& poses, const std::vector& targetPoses, const std::vector& fullBodyMask, const Eigen::VectorXd& velocity_weights ) { auto N = poses.size(); // Create a trajectory corrector for fixing the full body fullBodyMask positions: Eigen::VectorXd margins(N); for( size_t i = 0; i < N; ++i ) { margins[i] = fullBodyMask[i] ? 0.0f : -1.0f; } TrajectoryCorrector corrector( margins, pos_weight * 10, vel_weight, acc_weight, velocity_weights ); for (uint32_t boneIdx = 0; boneIdx < poses[0].size(); ++boneIdx) { CorrectRotationsForBone( poses, targetPoses, fullBodyMask, corrector, boneIdx, true ); } } void DoEffectorIK( std::vector& poses, const std::vector& targetPoses, const std::vector& fullBodyMask, const std::vector& endEffectorPins, const std::vector& joint_parents_vec, const std::vector& defaultPose ) { // Apply IK for effector pins auto N = poses.size(); std::map> jointCorrectionMasks; std::vector ikFixedPoses = poses; for (auto& c : endEffectorPins) { auto jointIdx = c.jointIndex; if(jointCorrectionMasks[jointIdx].empty()) { // initialize to the full body constraint mask because we // want to constrain that anyway: jointCorrectionMasks[jointIdx] = fullBodyMask; } // Add a trajectory correction mask for the parent joint: auto parentIdx = joint_parents_vec[jointIdx]; if(jointCorrectionMasks[parentIdx].empty()) { // initialize to the full body constraint mask because we // want to constrain that anyway: jointCorrectionMasks[parentIdx] = fullBodyMask; } // Add a trajectory correction mask for its parent if this is // 2 bone IK: auto parentParentIdx = joint_parents_vec[parentIdx]; if(c.contactType == Animation::kTwoBone) { if(jointCorrectionMasks[parentParentIdx].empty()) { // initialize to the full body constraint mask because we // want to constrain that anyway: jointCorrectionMasks[parentParentIdx] = fullBodyMask; } } for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame) { if (c.contactMask[fixFrame]) { const auto targetGlobalTransform = Animation::JointLocalToGlobal(joint_parents_vec, jointIdx, targetPoses[fixFrame]); // flag the parent joint as fixed in its correction mask: jointCorrectionMasks[parentIdx][fixFrame] = 1; switch(c.contactType) { case Animation::kOneBone: { IK::OneBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, jointIdx, 1.0, targetGlobalTransform.GetTranslation(), joint_parents_vec ); break; } case Animation::kTwoBone: { // flag the parent parent joint as fixed in its correction mask: jointCorrectionMasks[parentParentIdx][fixFrame] = 1; IK::TwoBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, jointIdx, 1.0, targetGlobalTransform.GetTranslation(), joint_parents_vec, c.hintOffset ); break; } } // now we need to fix things so the global rotation of the joint // matches the input: jointCorrectionMasks[jointIdx][fixFrame] = 1; auto parentGlobalTransform = Animation::JointLocalToGlobal(joint_parents_vec, parentIdx, ikFixedPoses[fixFrame]); ikFixedPoses[fixFrame][jointIdx].SetRotation( targetGlobalTransform.GetRotation() * parentGlobalTransform.GetRotation().GetConjugate() ); } } } // Applying the effector pin IK introduces popping into the animation, // so let's apply the interpolator to all the joints we modified so as to // line the trajectory up properly again: Eigen::VectorXd margins(N); for( auto &kv : jointCorrectionMasks) { for( size_t i = 0; i < N; ++i ) { margins[i] = kv.second[i] ? 0.0f : -1.0f; } TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight); CorrectRotationsForBone( poses, ikFixedPoses, kv.second, corrector, kv.first, false ); } } void DoContactIK( std::vector& poses, const std::vector& fullBodyMask, const std::vector& contacts, const std::vector& endEffectorPins, const std::vector& joint_parents_vec, const std::vector& defaultPose, float contactThreshold, bool has_double_ankle_joints ) { auto N = poses.size(); Eigen::VectorXd margins = Eigen::VectorXd::Zero(N); // Apply IK to stabilize limbs on contacts std::map> jointCorrectionMasks; std::vector ikFixedPoses = poses; // Save original poses before any modifications (for double ankle correction later) const std::vector originalPoses = poses; // Track which frames were corrected for each 2-bone contact (for double ankle correction later) std::map> twoBoneContactFrames; auto addEndEffectorMask = [&](uint32_t jointIdx, uint32_t parentIdx, std::vector& jointMask) { auto it = std::find_if( endEffectorPins.begin(), endEffectorPins.end(), [&](const auto &c) { if(jointIdx == c.jointIndex) { return true; } return false; } ); if(it == endEffectorPins.end()) { // We could be correcting the toe joint, in which case we need to use // the parent joint instead: it = std::find_if( endEffectorPins.begin(), endEffectorPins.end(), [&](const auto &c) { if(parentIdx == c.jointIndex) { return true; } return false; } ); } if(it != endEffectorPins.end()) { const auto &msk = it->contactMask; for(size_t i=0; i < msk.size(); ++i) { if(msk[i]) { jointMask[i] = 1.0f; } } } }; // Process two bone contacts first: for (auto& c : contacts) { if(c.contactType != Animation::kTwoBone) { continue; } const auto jointIdx = c.jointIndex; auto parentIdx = joint_parents_vec[jointIdx]; auto parentParentIdx = joint_parents_vec[parentIdx]; auto jointMask = fullBodyMask; addEndEffectorMask(jointIdx, parentIdx, jointMask); // We'll actually be modifying 3 joints here: // * The two joints immediately up in the hierarchy because of the 2 bone IK // * The joint itself because we restore its original global rotation if(jointCorrectionMasks[parentIdx].empty()) { jointCorrectionMasks[parentIdx] = jointMask; } if(jointCorrectionMasks[parentParentIdx].empty()) { jointCorrectionMasks[parentParentIdx] = jointMask; } if(jointCorrectionMasks[jointIdx].empty()) { jointCorrectionMasks[jointIdx] = jointMask; } // Compute the intervals in which the joint is in contact with the floor: auto contactIntervals = ComputeContactIntervals(c.contactMask, jointMask, contactThreshold); FilterContactIntervals(contactIntervals, jointMask); std::vector contactPoints; std::vector inContact; FindContactPoints( contactPoints, inContact, joint_parents_vec, jointIdx, poses, contactIntervals, jointMask, c.contactMask.size(), c.minHeight ); for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame) { if (inContact[fixFrame]) { auto target = contactPoints[fixFrame]; jointCorrectionMasks[parentIdx][fixFrame] = 1.0f; jointCorrectionMasks[parentParentIdx][fixFrame] = 1.0f; jointCorrectionMasks[jointIdx][fixFrame] = 1.0f; // Track this frame for double ankle correction later if (has_double_ankle_joints) { if (twoBoneContactFrames[jointIdx].empty()) twoBoneContactFrames[jointIdx].resize(fullBodyMask.size(), false); twoBoneContactFrames[jointIdx][fixFrame] = true; } // save the original global rotation of the joint: auto jointGlobalRotation = Animation::JointLocalToGlobal( joint_parents_vec, jointIdx, ikFixedPoses[fixFrame] ).GetRotation(); const float w = TargetReachFalloff( joint_parents_vec, defaultPose, jointIdx, c.contactType, target, ikFixedPoses[fixFrame] ); // std::cout << "Frame " << fixFrame << ": w=" << w << std::endl; // apply the 2 bone IK: auto origParentRotation = ikFixedPoses[fixFrame][parentIdx].GetRotation(); auto origParentParentRotation = ikFixedPoses[fixFrame][parentParentIdx].GetRotation(); IK::TwoBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, jointIdx, 1.0f, target, joint_parents_vec, c.hintOffset ); ikFixedPoses[fixFrame][parentIdx].SetRotation(Math::Quaternion::SLerp(origParentRotation, ikFixedPoses[fixFrame][parentIdx].GetRotation(), w)); ikFixedPoses[fixFrame][parentParentIdx].SetRotation(Math::Quaternion::SLerp(origParentParentRotation, ikFixedPoses[fixFrame][parentParentIdx].GetRotation(), w)); // restore previous global rotation of this joint: auto parentGloblalRotation = Animation::JointLocalToGlobal( joint_parents_vec, parentIdx, ikFixedPoses[fixFrame] ).GetRotation(); jointCorrectionMasks[jointIdx][fixFrame] = 1.0f; ikFixedPoses[fixFrame][jointIdx].SetRotation( jointGlobalRotation * parentGloblalRotation.GetConjugate() ); auto result = Animation::JointLocalToGlobal( joint_parents_vec, jointIdx, ikFixedPoses[fixFrame] ).GetTranslation(); } } } for( auto &kv : jointCorrectionMasks) { for( size_t i = 0; i < N; ++i ) { margins[i] = kv.second[i] ? 0.0f : -1.0f; } TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight); CorrectRotationsForBone( poses, ikFixedPoses, kv.second, corrector, kv.first, false ); } jointCorrectionMasks.clear(); // Then process one bone contacts: for(auto &c : contacts) { if(c.contactType != Animation::kOneBone) { continue; } const auto jointIdx = c.jointIndex; auto parentIdx = joint_parents_vec[jointIdx]; // We can't touch frames that have been constrained with full body constraints // or the end effector constraints for this joint, so let's combine fullBodyMask // with the end effector mask for this joint if it exists so we can use that // information later: auto jointMask = fullBodyMask; addEndEffectorMask(jointIdx, parentIdx, jointMask); // Add a trajectory correction mask for the parent joint: if(jointCorrectionMasks[parentIdx].empty()) { jointCorrectionMasks[parentIdx] = jointMask; } // Compute the intervals in which the joint is in contact with the floor: auto contactIntervals = ComputeContactIntervals(c.contactMask, jointMask, contactThreshold); FilterContactIntervals(contactIntervals, jointMask, true); for(const auto &interval : contactIntervals) { for (int fixFrame = interval.first; fixFrame < interval.second; ++fixFrame) { // All we're going to do here is stick the joint to the floor - // we're going to allow it to slide from side to side. // Find a target position that lies on the floor by iteratively // projecting the joint to the floor (pure laziness really, this could // be done analytically): Math::Vector parentPos = Animation::JointLocalToGlobal(joint_parents_vec, parentIdx, ikFixedPoses[fixFrame]).GetTranslation(); Math::Vector target = Animation::JointLocalToGlobal(joint_parents_vec, jointIdx, ikFixedPoses[fixFrame]).GetTranslation(); float jointLength = (target - parentPos).GetLength3(); for(int32_t i = 0; i < 10; ++i) { target.SetY(c.minHeight); auto dir = (target - parentPos).GetNormalized3(); target = parentPos + dir * jointLength; } IK::OneBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, jointIdx, 1.0f, target, joint_parents_vec ); jointCorrectionMasks[parentIdx][fixFrame] = 1.0f; } } } // Fixing the contacts with IK will introduce popping into the animation, // so let's apply the interpolator to all the joints we modified so as to // line the trajectory up properly again: for( auto &kv : jointCorrectionMasks) { for( size_t i = 0; i < N; ++i ) { margins[i] = kv.second[i] ? 0.0f : -1.0f; } TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight); CorrectRotationsForBone( poses, ikFixedPoses, kv.second, corrector, kv.first, false ); } if (has_double_ankle_joints) { // Maps to save target positions BEFORE 2-bone IK modifies them std::map> savedFirstAnkleTargets; // [firstAnkleIdx][frame] -> position std::map> savedToeTargets; // [firstAnkleIdx][frame] -> position std::map contactToToeIdx; // firstAnkleIdx -> toeIdx // Find toe joints for each leg for (const auto& tc : contacts) { if (tc.contactType == Animation::kOneBone) { // The parent of the toe is the 1st ankle int parentIdx = joint_parents_vec[tc.jointIndex]; if (parentIdx >= 0) { contactToToeIdx[parentIdx] = tc.jointIndex; } } } // For each 2-bone contact, correct the parent (2nd ankle) joint for (auto& c : contacts) { if (c.contactType != Animation::kTwoBone) continue; const auto firstAnkleIdx = c.jointIndex; const auto secondAnkleIdx = joint_parents_vec[firstAnkleIdx]; const auto kneeIdx = joint_parents_vec[secondAnkleIdx]; const auto hipIdx = joint_parents_vec[kneeIdx]; if (hipIdx < 0) continue; // safety check // Get saved contact frames for this ankle auto it = twoBoneContactFrames.find(firstAnkleIdx); if (it == twoBoneContactFrames.end()) continue; const auto& contactFrames = it->second; // Add correction mask for knee and hip auto jointMask = fullBodyMask; addEndEffectorMask(firstAnkleIdx, secondAnkleIdx, jointMask); if (jointCorrectionMasks[kneeIdx].empty()) jointCorrectionMasks[kneeIdx] = jointMask; if (jointCorrectionMasks[hipIdx].empty()) jointCorrectionMasks[hipIdx] = jointMask; for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame) { // Only correct frames where the 1st ankle was corrected if (!contactFrames[fixFrame]) continue; // *** SAVE TARGET POSITIONS BEFORE 2-BONE IK *** savedFirstAnkleTargets[firstAnkleIdx][fixFrame] = Animation::JointLocalToGlobal( joint_parents_vec, firstAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation(); if (contactToToeIdx.count(firstAnkleIdx)) { savedToeTargets[firstAnkleIdx][fixFrame] = Animation::JointLocalToGlobal( joint_parents_vec, contactToToeIdx[firstAnkleIdx], ikFixedPoses[fixFrame]).GetTranslation(); } // Get original global transforms (before any IK corrections) auto originalFirstAnkleGlobal = Animation::JointLocalToGlobal( joint_parents_vec, firstAnkleIdx, originalPoses[fixFrame]); auto originalSecondAnkleGlobal = Animation::JointLocalToGlobal( joint_parents_vec, secondAnkleIdx, originalPoses[fixFrame]); // Compute delta from 1st ankle to 2nd ankle in original animation auto deltaFirstToSecond = originalFirstAnkleGlobal.GetDeltaToOther(originalSecondAnkleGlobal); // Get corrected 1st ankle global transform auto correctedFirstAnkleGlobal = Animation::JointLocalToGlobal( joint_parents_vec, firstAnkleIdx, ikFixedPoses[fixFrame]); // Apply the original delta to the corrected 1st ankle to get target for 2nd ankle auto target = (deltaFirstToSecond * correctedFirstAnkleGlobal).GetTranslation(); // print current and target second ankle positions auto currPos = Animation::JointLocalToGlobal( joint_parents_vec, secondAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation(); // Apply 2-bone IK: Hip -> Knee -> 2nd Ankle IK::TwoBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, secondAnkleIdx, 1.0f, target, joint_parents_vec, c.hintOffset ); // auto correctedPos = Animation::JointLocalToGlobal( // joint_parents_vec, secondAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation(); // std::cout << "Frame " << fixFrame << ": target second ankle=(" << target.GetX() << ", " << target.GetY() << ", " << target.GetZ() << "), corrected second ankle position=(" << correctedPos.GetX() << ", " << correctedPos.GetY() << ", " << correctedPos.GetZ() << ")" << std::endl; jointCorrectionMasks[kneeIdx][fixFrame] = 1.0f; jointCorrectionMasks[hipIdx][fixFrame] = 1.0f; } } // Smooth the corrected joints for (auto& kv : jointCorrectionMasks) { for (size_t i = 0; i < N; ++i) margins[i] = kv.second[i] ? 0.0f : -1.0f; TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight); CorrectRotationsForBone(poses, ikFixedPoses, kv.second, corrector, kv.first, false); } // *** PHASE 2: 1-bone IKs to restore 1st ankle and toe *** jointCorrectionMasks.clear(); for (auto& c : contacts) { if (c.contactType != Animation::kTwoBone) continue; const auto firstAnkleIdx = c.jointIndex; const auto secondAnkleIdx = joint_parents_vec[firstAnkleIdx]; auto it = twoBoneContactFrames.find(firstAnkleIdx); if (it == twoBoneContactFrames.end()) continue; // Setup correction masks auto jointMask = fullBodyMask; addEndEffectorMask(firstAnkleIdx, secondAnkleIdx, jointMask); if (jointCorrectionMasks[secondAnkleIdx].empty()) jointCorrectionMasks[secondAnkleIdx] = jointMask; if (jointCorrectionMasks[firstAnkleIdx].empty()) jointCorrectionMasks[firstAnkleIdx] = jointMask; for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame) { if (!it->second[fixFrame]) continue; // 1-bone IK: Rotate 2nd ankle so 1st ankle reaches saved target IK::OneBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, firstAnkleIdx, 1.0f, savedFirstAnkleTargets[firstAnkleIdx][fixFrame], joint_parents_vec ); jointCorrectionMasks[secondAnkleIdx][fixFrame] = 1.0f; // auto target = savedFirstAnkleTargets[firstAnkleIdx][fixFrame]; // auto corrected = Animation::JointLocalToGlobal( // joint_parents_vec, firstAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation(); // std::cout << "Frame " << fixFrame << ": target first ankle=(" << target.GetX() << ", " << target.GetY() << ", " << target.GetZ() << "), corrected first ankle=(" << corrected.GetX() << ", " << corrected.GetY() << ", " << corrected.GetZ() << ")" << std::endl; // 1-bone IK: Rotate 1st ankle so toe reaches saved target if (contactToToeIdx.count(firstAnkleIdx) && savedToeTargets[firstAnkleIdx].count(fixFrame)) { IK::OneBoneIk( ikFixedPoses[fixFrame], Math::Transform::Identity, contactToToeIdx[firstAnkleIdx], 1.0f, savedToeTargets[firstAnkleIdx][fixFrame], joint_parents_vec ); jointCorrectionMasks[firstAnkleIdx][fixFrame] = 1.0f; } // target = savedToeTargets[firstAnkleIdx][fixFrame]; // corrected = Animation::JointLocalToGlobal( // joint_parents_vec, contactToToeIdx[firstAnkleIdx], ikFixedPoses[fixFrame]).GetTranslation(); // std::cout << "Frame " << fixFrame << ": target toe=(" << target.GetX() << ", " << target.GetY() << ", " << target.GetZ() << "), corrected toe=(" << corrected.GetX() << ", " << corrected.GetY() << ", " << corrected.GetZ() << ")" << std::endl; } } // Smooth 2nd ankle and 1st ankle for (auto& kv : jointCorrectionMasks) { for (size_t i = 0; i < N; ++i) margins[i] = kv.second[i] ? 0.0f : -1.0f; TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight); CorrectRotationsForBone(poses, ikFixedPoses, kv.second, corrector, kv.first, false); } } } } Math::Transform Animation::JointLocalToGlobal( const std::vector& joint_parents_vec, int32_t index, const Pose& localPose, const Math::Transform& rootTx) { Math::Transform worldTx = Math::Transform::Identity; while (index > -1) { worldTx = worldTx * localPose[index]; index = joint_parents_vec[index]; } return worldTx * rootTx; } void Animation::CorrectMotion( std::vector& poses, const std::vector& targetPoses, const std::vector& fullBodyMask, const std::vector& rootMask, const std::vector& contacts, const std::vector& endEffectorPins, const std::vector& joint_parents_vec, const std::vector& defaultPose, float contactThreshold, float root_margin, bool has_double_ankle_joints ) { // Calculate some weights so we can preserve velocities more strongly on frames where // the root velocity is low const uint32_t N = poses.size(); Eigen::VectorXd velocity_weights(N); for (uint32_t frame = 1; frame < N; ++frame) { // work out xz velocity for this frame: float xdiff = poses[frame][0].GetTranslation()[0] - poses[frame - 1][0].GetTranslation()[0]; float zdiff = poses[frame][0].GetTranslation()[2] - poses[frame - 1][0].GetTranslation()[2]; // find velocity magnitude, divided by a typical walking speed: float v_mag = sqrtf(xdiff*xdiff + zdiff*zdiff) / 0.05f; // weight lower velocities higher so that the corrector doesn't make the character drift around // when it's supposed to stand still: v_mag = std::max(v_mag, 1.0f/1000.0f); velocity_weights(frame) = 1.0f / v_mag; } velocity_weights[0] = velocity_weights[1]; // Correct root y coordinates. // This will warp the root y coordinates in "poses" so they match the root y coordinates // in "targetPoses", on frames where the root y coordinates are constrained, ie the frames // where fullBodyMask = 1. // In addition to this, it preserves the root y coordinates in "pose" on frames where foot // contacts are active, to avoid mushiness when characters are jumping. CorrectHipsY( poses, targetPoses, fullBodyMask, contacts, contactThreshold ); // Correct root xz coordinates: // This will warp the root xz coordinates in "poses" so they match the xz coordinates // in "targetPoses" on frames where fullBodyMask = 1, and warp them so they're within // "root_margin" units of targetPoses on frames where rootMask = 1. CorrectHipsXZ( poses, targetPoses, fullBodyMask, rootMask, endEffectorPins, velocity_weights, root_margin ); // Correct joint rotations by warping the rotations so they match targetPoses on frames // where fullBodyMask = 1: CorrectJointRotations( poses, targetPoses, fullBodyMask, velocity_weights ); // Apply IK for end effector pins DoEffectorIK( poses, targetPoses, fullBodyMask, endEffectorPins, joint_parents_vec, defaultPose ); // Apply IK to stabilize limbs on contacts DoContactIK( poses, fullBodyMask, contacts, endEffectorPins, joint_parents_vec, defaultPose, contactThreshold, has_double_ankle_joints ); // std::cout << "Running post processing." << std::endl; } ================================================ FILE: MotionCorrection/src/cpp/AnimProcessing/Utility.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Math/Transform.h" #include #include namespace Animation { enum IKType { kOneBone, kTwoBone }; Math::Transform JointLocalToGlobal( const std::vector& joint_parents_vec, int32_t index, const std::vector& localPose, const Math::Transform& rootTx = Math::Transform::Identity ); struct ContactInfo { // index IK contact joint: int jointIndex; // mask indicating which frames are in contact: std::vector contactMask; // contact type: IKType contactType = kTwoBone; // Extra info for TwoBoneIK Math::Vector hintOffset = Math::Vector::Zero; float minHeight = 0.0f; }; void CorrectMotion( std::vector< std::vector >& poses, const std::vector< std::vector >& targetPoses, const std::vector& mask, const std::vector& rootMask, const std::vector& contacts, const std::vector& endEffectorPins, const std::vector& joint_parents_vec, const std::vector& defaultPose, float contactThreshold, float root_margin, bool has_double_ankle_joints ); } ================================================ FILE: MotionCorrection/src/cpp/BindingsPython.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "AnimProcessing/Utility.h" #ifdef _WIN32 #pragma warning(push) #pragma warning(disable : 4623 4191 4686 4868 5219 4191 4355) #endif #include #include #include #ifdef _WIN32 #pragma warning(pop) #endif namespace py = pybind11; float strip_nan_inf(float x) noexcept { if (std::isnan(x)) return 0; if (std::isinf(x)) return 0; return x; } void correct_motion( py::array_t &rootTranslations, py::array_t &jointRotations, const py::array_t& rootTranslationsTarget, const py::array_t& jointRotationsTarget, const py::array_t& fullPoseMask, const py::array_t& leftHandMask, const py::array_t& rightHandMask, const py::array_t& leftFootMask, const py::array_t& rightFootMask, const py::array_t& rootMask, const py::array_t& contacts, const py::list& joint_parents, const py::list& joint_ref_translations, const py::list& joint_ref_rotations, int left_hand_idx, int right_hand_idx, int left_foot_idx, int right_foot_idx, float contact_threshold, float root_margin, bool has_double_ankle_joints ) { if(joint_parents.size() != joint_ref_translations.size()) { throw std::runtime_error("correct_motion python bindings: joint_parents and joint_ref_translations must have the same size"); } if(joint_parents.size() != joint_ref_rotations.size()) { throw std::runtime_error("correct_motion python bindings: joint_parents and joint_ref_rotations must have the same size"); } if(left_hand_idx < 0 || right_hand_idx < 0 || left_foot_idx < 0 || right_foot_idx < 0) { throw std::runtime_error("correct_motion python bindings: left_hand_idx, right_hand_idx, left_foot_idx, and right_foot_idx must be non-negative"); } if(left_hand_idx >= joint_parents.size() || right_hand_idx >= joint_parents.size() || left_foot_idx >= joint_parents.size() || right_foot_idx >= joint_parents.size()) { throw std::runtime_error("correct_motion python bindings: left_hand_idx, right_hand_idx, left_foot_idx, and right_foot_idx must be less than the number of joints"); } std::vector defaultPose(joint_parents.size()); for (size_t i = 0; i < joint_ref_translations.size(); ++i) { if (!py::isinstance(joint_ref_translations[i])) { throw std::runtime_error("correct_motion python bindings: Expected joint_ref_translations to be a list of lists"); } py::list inner_list = joint_ref_translations[i].cast(); if (inner_list.size() != 3) { throw std::runtime_error("correct_motion python bindings: Expected joint_ref_translations to be a list of lists, length 3"); } if ( !py::isinstance(inner_list[0]) || !py::isinstance(inner_list[1]) || !py::isinstance(inner_list[2]) ) { throw std::runtime_error("correct_motion python bindings: Expected joint_ref_translations to be a list of lists, length 3, float values"); } if (!py::isinstance(joint_ref_rotations[i])) { throw std::runtime_error("correct_motion python bindings: Expected joint_ref_rotations to be a list of lists"); } py::list inner_list_rot = joint_ref_rotations[i].cast(); if (inner_list_rot.size() != 4) { throw std::runtime_error("correct_motion python bindings: Expected joint_ref_rotations to be a list of lists, length 4"); } if ( !py::isinstance(inner_list_rot[0]) || !py::isinstance(inner_list_rot[1]) || !py::isinstance(inner_list_rot[2]) || !py::isinstance(inner_list_rot[3]) ) { throw std::runtime_error("correct_motion python bindings: Expected joint_ref_rotations to be a list of lists, length 4, float values"); } defaultPose[i].SetTranslation(Math::Vector( inner_list[0].cast(), inner_list[1].cast(), inner_list[2].cast())); defaultPose[i].SetRotation(Math::Quaternion( inner_list_rot[0].cast(), inner_list_rot[1].cast(), inner_list_rot[2].cast(), inner_list_rot[3].cast() )); } std::vector joint_parents_vec(joint_parents.size()); for (size_t i = 0; i < joint_parents.size(); ++i) { if (!py::isinstance(joint_parents[i])) { throw std::runtime_error("correct_motion python bindings: Expected joint_parents to be a list of ints"); } joint_parents_vec[i] = joint_parents[i].cast(); if (joint_parents_vec[i] >= (int)joint_parents.size()) { throw std::runtime_error("correct_motion python bindings: joint_parents must be a list of ints, and all values must be less than the number of joints"); } } size_t num_joints = defaultPose.size(); size_t gen_length = fullPoseMask.size(); if( leftHandMask.size() != (int)gen_length || rightHandMask.size() != (int)gen_length || leftFootMask.size() != (int)gen_length || rightFootMask.size() != (int)gen_length || rootMask.size() != (int)gen_length ) { throw std::runtime_error("correct_motion python bindings: all masks must have the same size"); } if(rootTranslations.size() != 3 * (int)gen_length) { throw std::runtime_error("correct_motion python bindings: rootTranslations has the wrong size"); } if(jointRotations.size() != 4 * (int)num_joints * (int)gen_length) { throw std::runtime_error("correct_motion python bindings: jointRotations has the wrong size"); } if(rootTranslationsTarget.size() != 3 * (int)gen_length) { throw std::runtime_error("correct_motion python bindings: rootTranslationsTarget has the wrong size"); } if(jointRotationsTarget.size() != 4 * (int)num_joints * (int)gen_length) { throw std::runtime_error("correct_motion python bindings: jointRotationsTarget has the wrong size"); } std::vector endEffectorPins(4); endEffectorPins[0].jointIndex = left_hand_idx; endEffectorPins[0].hintOffset = Math::Vector(0.0f, 0.0f, -0.1f); endEffectorPins[1].jointIndex = right_hand_idx; endEffectorPins[1].hintOffset = Math::Vector(0.0f, 0.0f, -0.1f); endEffectorPins[2].jointIndex = left_foot_idx; endEffectorPins[2].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f); endEffectorPins[3].jointIndex = right_foot_idx; endEffectorPins[3].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f); endEffectorPins[0].contactMask.reserve(gen_length); endEffectorPins[1].contactMask.reserve(gen_length); endEffectorPins[2].contactMask.reserve(gen_length); endEffectorPins[3].contactMask.reserve(gen_length); for(size_t i = 0; i < gen_length; ++i) { endEffectorPins[0].contactMask.push_back((1.0f - fullPoseMask.at(i)) * leftHandMask.at(i)); endEffectorPins[1].contactMask.push_back((1.0f - fullPoseMask.at(i)) * rightHandMask.at(i)); endEffectorPins[2].contactMask.push_back((1.0f - fullPoseMask.at(i)) * leftFootMask.at(i)); endEffectorPins[3].contactMask.push_back((1.0f - fullPoseMask.at(i)) * rightFootMask.at(i)); } std::vector contactInfo(2); auto footTranslation = Animation::JointLocalToGlobal( joint_parents_vec, right_foot_idx, defaultPose ).GetTranslation(); contactInfo[0].jointIndex = right_foot_idx; contactInfo[0].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f); contactInfo[0].minHeight = footTranslation.GetY(); footTranslation = Animation::JointLocalToGlobal( joint_parents_vec, left_foot_idx, defaultPose ).GetTranslation(); contactInfo[1].jointIndex = left_foot_idx; contactInfo[1].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f); contactInfo[1].minHeight = footTranslation.GetY(); auto& rContacts = contactInfo[0].contactMask; auto& lContacts = contactInfo[1].contactMask; rContacts.resize(fullPoseMask.size()); lContacts.resize(fullPoseMask.size()); for (int i = 0; i < fullPoseMask.size(); ++i) { // don't flag it as a contact if it's been masked: rContacts[i] = rightFootMask.at(i) ? 0 : contacts.at(4 * i + 2); lContacts[i] = leftFootMask.at(i) ? 0 : contacts.at(4 * i + 0); // Flag the heel as a contact if the toe is a contact: rContacts[i] = std::min((rightFootMask.at(i) ? 0 : contacts.at(4 * i + 3)) + rContacts[i], 1.0f); lContacts[i] = std::min((leftFootMask.at(i) ? 0 : contacts.at(4 * i + 1)) + lContacts[i], 1.0f); } int left_toe_idx = -1; int right_toe_idx = -1; for(int i = 0; i < num_joints; ++i) { if(joint_parents_vec[i] == left_foot_idx) { left_toe_idx = i; } if(joint_parents_vec[i] == right_foot_idx) { right_toe_idx = i; } } if(left_toe_idx != -1 && right_toe_idx != -1) { auto toeTranslation = Animation::JointLocalToGlobal( joint_parents_vec, right_toe_idx, defaultPose ).GetTranslation(); contactInfo.resize(4); contactInfo[2].jointIndex = right_toe_idx; contactInfo[2].contactType = Animation::kOneBone; contactInfo[2].minHeight = toeTranslation.GetY(); contactInfo[3].jointIndex = left_toe_idx; contactInfo[3].contactType = Animation::kOneBone; contactInfo[3].minHeight = toeTranslation.GetY(); auto& rToeContacts = contactInfo[2].contactMask; auto& lToeContacts = contactInfo[3].contactMask; // fill up the ankle contacts: rToeContacts.resize(fullPoseMask.size()); lToeContacts.resize(fullPoseMask.size()); for (int i = 0; i < fullPoseMask.size(); ++i) { // don't flag it as a contact if it's been masked: rToeContacts[i] = rightFootMask.at(i) ? 0 : contacts.at(4 * i + 3); lToeContacts[i] = leftFootMask.at(i) ? 0 : contacts.at(4 * i + 1); } } auto setTransforms = [gen_length, num_joints]( std::vector< std::vector > &poses, const py::array_t &rootTranslations, const py::array_t &jointRotations ) { for (size_t f = 0; f < gen_length; ++f) { poses[f][0].SetTranslation({ strip_nan_inf(rootTranslations.at(3*f+0)), strip_nan_inf(rootTranslations.at(3*f+1)), strip_nan_inf(rootTranslations.at(3*f+2)) }); } for (size_t f = 0; f < gen_length; ++f) { for (size_t j = 0; j < num_joints; ++j) { // x y z w order: Math::Quaternion q( strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 1)), strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 2)), strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 3)), strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 0)) ); q.Normalize(); poses[f][j].SetRotation(q); } } }; std::vector< std::vector > posesFixed(gen_length, defaultPose); setTransforms(posesFixed, rootTranslations, jointRotations); std::vector< std::vector > posesTarget(gen_length, defaultPose); setTransforms(posesTarget, rootTranslationsTarget, jointRotationsTarget); std::vector fullPoseMask_vec; std::vector rootMask_vec; for (size_t f = 0; f < gen_length; ++f) { fullPoseMask_vec.push_back(fullPoseMask.at(f)); rootMask_vec.push_back(rootMask.at(f)); } Animation::CorrectMotion( posesFixed, posesTarget, fullPoseMask_vec, rootMask_vec, contactInfo, endEffectorPins, joint_parents_vec, defaultPose, contact_threshold, root_margin, has_double_ankle_joints ); for (size_t f = 0; f < gen_length; ++f) { auto t = posesFixed[f][0].GetTranslation(); rootTranslations.mutable_at(3*f+0) = t.GetX(); rootTranslations.mutable_at(3*f+1) = t.GetY(); rootTranslations.mutable_at(3*f+2) = t.GetZ(); } for (size_t f = 0; f < gen_length; ++f) { for (size_t j = 0; j < num_joints; ++j) { auto q = posesFixed[f][j].GetRotation(); // w x y z order jointRotations.mutable_at(4 * (num_joints * f + j) + 0) = ((float*)&q)[3]; jointRotations.mutable_at(4 * (num_joints * f + j) + 1) = ((float*)&q)[0]; jointRotations.mutable_at(4 * (num_joints * f + j) + 2) = ((float*)&q)[1]; jointRotations.mutable_at(4 * (num_joints * f + j) + 3) = ((float*)&q)[2]; } } } PYBIND11_MODULE(_motion_correction, m) { m.doc() = "Motion Correction Python bindings"; m.def("correct_motion", &correct_motion); } ================================================ FILE: MotionCorrection/src/cpp/Compiler.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once // Compiler specific defines // Finds the compiler type and version. #if defined(__clang__) # define COMPILER_CLANG #elif defined(__GNUC__) // Check after Clang, as Clang defines this too # define COMPILER_GNUC #elif defined(_MSC_VER) // Check after Clang, since we could be building with either within VS # define COMPILER_MSVC #else # pragma error "Unknown compiler. " #endif #if defined(COMPILER_MSVC) #define FORCE_INLINE __forceinline #elif defined(COMPILER_GNUC) #define FORCE_INLINE inline __attribute__((always_inline)) #endif ================================================ FILE: MotionCorrection/src/cpp/Debug.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Platform.h" #define ASSERT( cond ) do { if( !(cond) ) { DEBUG_BREAK(); } } while( 0 ) #define HALT() { DEBUG_BREAK(); } #define UNIMPLEMENTED_FUNCTION() { DEBUG_BREAK(); } #define UNREACHABLE_CODE() { DEBUG_BREAK(); } ================================================ FILE: MotionCorrection/src/cpp/Math/Constants.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include // Mathematical constants namespace Math { static constexpr float const Epsilon = 1.0e-06f; static constexpr float const LargeEpsilon = 1.0e-04f; static constexpr float const HugeEpsilon = 1.0e-02f; static constexpr float const Pi = 3.141592654f; static constexpr float const TwoPi = 6.283185307f; static constexpr float const OneDivPi = 0.318309886f; static constexpr float const OneDivTwoPi = 0.159154943f; static constexpr float const PiDivTwo = 1.570796327f; static constexpr float const PiDivFour = 0.785398163f; static constexpr float const SqrtTwo = 1.4142135623730950488016887242097f; static constexpr float const OneDivSqrtTwo = 1.0f / SqrtTwo; static constexpr float const DegreesToRadians = 0.0174532925f; static constexpr float const RadiansToDegrees = 57.2957795f; static constexpr float const Infinity = std::numeric_limits::infinity(); static constexpr float const QNaN = std::numeric_limits::quiet_NaN(); } ================================================ FILE: MotionCorrection/src/cpp/Math/Matrix.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "Matrix.h" #include using namespace Math; namespace { static bool CheckForZeroScaleInRow(float scale, const Vector& row) { float const absScale = Math::Abs(scale); for (int i = 0; i < 3; i++) { if (absScale < 1 && Math::Abs(row[i]) >= FLT_MAX * absScale) { return false; } } return true; } static bool ExtractAndRemoveScalingAndShear(Matrix& matrix, Vector& scale, Vector& shear) { scale = Vector::Zero; shear = Vector::Zero; Float3 scaleValues = Float3::Zero; Float3 shearValues = Float3::Zero; // This implementation follows the technique described in the paper by // Spencer W. Thomas in the Graphics Gems II article: "Decomposing a // Matrix into Simple Transformations", p. 320. Vector row[3]; row[0] = Vector(matrix[0][0], matrix[0][1], matrix[0][2]); row[1] = Vector(matrix[1][0], matrix[1][1], matrix[1][2]); row[2] = Vector(matrix[2][0], matrix[2][1], matrix[2][2]); float maxVal = 0; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { if (Math::Abs(row[i][j]) > maxVal) { maxVal = Math::Abs(row[i][j]); } } } // We normalize the 3x3 matrix here. // It was noticed that this can improve numerical stability significantly, // especially when many of the upper 3x3 matrix's coefficients are very // close to zero; we correct for this step at the end by multiplying the // scaling factors by maxVal at the end (shear and rotation are not // affected by the normalization). if (maxVal != 0) { for (int i = 0; i < 3; i++) { if (!CheckForZeroScaleInRow(maxVal, row[i])) { return false; } else { row[i] /= maxVal; } } } // Compute X scale factor. scaleValues.m_x = row[0].Length3().ToFloat(); if (!CheckForZeroScaleInRow(scaleValues.m_x, row[0])) { return false; } // Normalize first row. row[0] /= scaleValues.m_x; // An XY shear factor will shear the X coord. as the Y coord. changes. // There are 6 combinations (XY, XZ, YZ, YX, ZX, ZY), although we only // extract the first 3 because we can effect the last 3 by shearing in // XY, XZ, YZ combined rotations and scales. // // shear matrix < 1, YX, ZX, 0, // XY, 1, ZY, 0, // XZ, YZ, 1, 0, // 0, 0, 0, 1 > // Compute XY shear factor and make 2nd row orthogonal to 1st. shearValues[0] = Vector::Dot3(row[0], row[1]).ToFloat(); row[1] -= row[0] * shearValues[0]; // Now, compute Y scale. scaleValues.m_y = row[1].Length3().ToFloat(); if (!CheckForZeroScaleInRow(scaleValues.m_y, row[1])) { return false; } // Normalize 2nd row and correct the XY shear factor for Y scaling. row[1] /= scaleValues.m_y; shearValues[0] /= scaleValues.m_y; // Compute XZ and YZ shears, orthogonalize 3rd row. shearValues[1] = Vector::Dot3(row[0], row[2]).ToFloat(); row[2] -= row[0] * shearValues[1]; shearValues[2] = Vector::Dot3(row[1], row[2]).ToFloat(); row[2] -= row[1] * shearValues[2]; // Next, get Z scale. scaleValues.m_z = row[2].Length3().ToFloat(); if (!CheckForZeroScaleInRow(scaleValues.m_z, row[2])) { return false; } // Normalize 3rd row and correct the XZ and YZ shear factors for Z scaling. row[2] /= scaleValues.m_z; shearValues[1] /= scaleValues.m_z; shearValues[2] /= scaleValues.m_z; // At this point, the upper 3x3 matrix in mat is orthonormal. // Check for a coordinate system flip. If the determinant // is less than zero, then negate the matrix and the scaling factors. if (Vector::Dot3(row[0], Vector::Cross3(row[1], row[2])).ToFloat() < 0) { for (int i = 0; i < 3; i++) { scaleValues[i] *= -1; row[i] *= -1; } } // Copy over the orthonormal rows into the returned matrix. // The upper 3x3 matrix in mat is now a rotation matrix. for (int i = 0; i < 3; i++) { matrix[i].SetX(row[i][0]); matrix[i].SetY(row[i][1]); matrix[i].SetZ(row[i][2]); } // Correct the scaling factors for the normalization step that we // performed above; shear and rotation are not affected by the // normalization. scaleValues *= maxVal; scale = Vector(scaleValues); shear = Vector(shearValues); return true; } } namespace Math { Matrix const Matrix::Identity(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); Matrix::Matrix(float v00, float v01, float v02, float v03, float v10, float v11, float v12, float v13, float v20, float v21, float v22, float v23, float v30, float v31, float v32, float v33) { m_rows[0] = Vector(v00, v01, v02, v03); m_rows[1] = Vector(v10, v11, v12, v13); m_rows[2] = Vector(v20, v21, v22, v23); m_rows[3] = Vector(v30, v31, v32, v33); } Matrix::Matrix(float values[16]) { m_rows[0] = Vector(values[0], values[1], values[2], values[3]); m_rows[1] = Vector(values[4], values[5], values[6], values[7]); m_rows[2] = Vector(values[8], values[9], values[10], values[11]); m_rows[3] = Vector(values[12], values[13], values[14], values[15]); } Matrix::Matrix(const Vector& xAxis, const Vector& yAxis, const Vector& zAxis) { m_rows[0] = xAxis; m_rows[1] = yAxis; m_rows[2] = zAxis; m_rows[3] = Vector::UnitW; } Matrix::Matrix(const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation) { m_rows[0] = xAxis; m_rows[1] = yAxis; m_rows[2] = zAxis; m_rows[3] = translation.GetWithW1(); } Matrix::Matrix(const EulerAngles& eulerAngles, const Vector translation) { float cx, cy, cz, sx, sy, sz, czsx, cxcz, sysz; sx = sinf((float)eulerAngles.m_x); cx = cosf((float)eulerAngles.m_x); sy = sinf((float)eulerAngles.m_y); cy = cosf((float)eulerAngles.m_y); sz = sinf((float)eulerAngles.m_z); cz = cosf((float)eulerAngles.m_z); czsx = cz * sx; cxcz = cx * cz; sysz = sy * sz; // Order is XYZ m_values[0][0] = cy * cz; m_values[0][1] = cy * sz; m_values[0][2] = -sy; m_values[1][0] = czsx * sy - cx * sz; m_values[1][1] = cxcz + sx * sysz; m_values[1][2] = cy * sx; m_values[2][0] = cxcz * sy + sx * sz; m_values[2][1] = -czsx + cx * sysz; m_values[2][2] = cx * cy; m_values[0][3] = 0.0f; m_values[1][3] = 0.0f; m_values[2][3] = 0.0f; // Translation m_rows[3] = translation.GetWithW1(); } EulerAngles Matrix::ToEulerAngles() const { EulerAngles result; result.m_x = Radians(Math::ATan2(m_values[1][2], m_values[2][2])); float const c2 = Math::Sqrt((m_values[0][0] * m_values[0][0]) + (m_values[0][1] * m_values[0][1])); result.m_y = Radians(Math::ATan2(-m_values[0][2], c2)); float const s1 = Math::Sin((float)result.m_x); float const c1 = Math::Cos((float)result.m_x); result.m_z = Radians(Math::ATan2((s1 * m_values[2][0]) - (c1 * m_values[1][0]), (c1 * m_values[1][1]) - (s1 * m_values[2][1]))); return result; } bool Matrix::Decompose(Quaternion& outRotation, Vector& outTranslation, Vector& outScale) const { Matrix copy = *this; Vector shr = Vector::Zero; outScale = Vector::Zero; // Extract and remove scale and shear from matrix if (ExtractAndRemoveScalingAndShear(copy, outScale, shr)) { // Extract rotation and translation from unscaled matrix outRotation = copy.GetRotation(); outTranslation = copy.GetTranslation().GetWithW0(); return true; } return false; } Vector Matrix::GetScale() const { Matrix copy = *this; Vector scale = Vector::Zero, shear; if (!ExtractAndRemoveScalingAndShear(copy, scale, shear)) { float const lengthX = m_rows[0].Length3().ToFloat(); float const lengthY = m_rows[1].Length3().ToFloat(); float const lengthZ = m_rows[2].Length3().ToFloat(); scale = Vector(lengthX, lengthY, lengthZ, 0.0f); } return scale; } Matrix& Matrix::SetScale(const Vector& newScale) { Vector scale, shear; bool result = ExtractAndRemoveScalingAndShear(*this, scale, shear); // Cannot set scale on matrix that contains zero-scale ASSERT(result); m_rows[0] = m_rows[0] * newScale.GetSplatX(); m_rows[1] = m_rows[1] * newScale.GetSplatY(); m_rows[2] = m_rows[2] * newScale.GetSplatZ(); return *this; } Matrix& Matrix::RemoveScale() { Vector scale, shear; bool result = ExtractAndRemoveScalingAndShear(*this, scale, shear); // Cannot remove zero scale from matrix ASSERT(result); return *this; } } ================================================ FILE: MotionCorrection/src/cpp/Math/Matrix.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Vector.h" #include "Quaternion.h" enum class CoordinateSpace : uint8_t { World, Local, }; // // Matrices are Row-Major // Multiplication order is right to left // ObjectWorldTransform = LocalObjectTransform * WorldTransform // namespace Math { class alignas(16) Matrix { public: static Matrix const Identity; public: static Matrix FromRotation(const Quaternion& rotation); static Matrix FromTranslation(const Vector& translation); static Matrix FromScale(const Vector& scale); static Matrix FromUniformScale(float uniformScale); static Matrix FromTranslationAndScale(const Vector& translation, const Vector& scale); static Matrix FromRotationBetweenVectors(const Vector sourceVector, const Vector targetVector); public: explicit Matrix(); explicit Matrix(NoInit_t); explicit Matrix(ZeroInit_t); explicit Matrix(float v00, float v01, float v02, float v03, float v10, float v11, float v12, float v13, float v20, float v21, float v22, float v23, float v30, float v31, float v32, float v33); explicit Matrix(float values[16]); explicit Matrix(Vector const& xAxis, Vector const& yAxis, Vector const& zAxis); explicit Matrix(Vector const& xAxis, Vector const& yAxis, Vector const& zAxis, Vector const& translation); Matrix(const Vector axis, Radians angleRadians); Matrix(const AxisAngle axisAngle); explicit Matrix(const Quaternion& rotation); explicit Matrix(const Quaternion& rotation, const Vector& translation, const Vector& scale = Vector::One); explicit Matrix(const Quaternion& rotation, const Vector& translation, float scale = 1.0f); explicit Matrix(const EulerAngles& eulerAngles, const Vector translation = Vector::UnitW); EulerAngles ToEulerAngles() const; float* AsFloatArray(); const float* AsFloatArray() const; const Vector& GetRow(uint32_t row) const; const Vector& GetAxisX() const; const Vector& GetAxisY() const; const Vector& GetAxisZ() const; void SetAxisX(const Vector& xAxis); void SetAxisY(const Vector& yAxis); void SetAxisZ(const Vector& zAxis); Float3 GetForwardVector() const; Float3 GetRightVector() const; Float3 GetUpVector() const; Vector GetUnitAxisX() const; Vector GetUnitAxisY() const; Vector GetUnitAxisZ() const; bool IsIdentity() const; bool IsOrthogonal() const; bool IsOrthonormal() const; bool Decompose(Quaternion& outRotation, Vector& outTranslation, Vector& outScale) const; Matrix& Transpose(); Matrix GetTransposed() const; Matrix& Invert(); Matrix GetInverse() const; Vector GetDeterminant() const; float GetDeterminantAsFloat() const; Vector GetTranslation() const; const Vector& GetTranslationWithW() const; Matrix& SetTranslation(Vector const& v); Matrix& SetTranslation(Float3 const& v); Matrix& SetTranslation(Float4 const& v); Quaternion GetRotation() const; Matrix& SetRotation(const Matrix& rotation); Matrix& SetRotation(const Quaternion& rotation); Matrix& SetRotationMaintainingScale(const Matrix& rotation); Matrix& SetRotationMaintainingScale(const Quaternion& rotation); Vector GetScale() const; Matrix& RemoveScale(); Matrix& SetScale(const Vector& scale); Matrix& SetScale(float uniformScale); Matrix& RemoveScaleFast(); Matrix& SetScaleFast(const Vector& scale); Matrix& SetScaleFast(float uniformScale); // // Operators // // Applies rotation and scale to a vector and returns a result with the W = 0 Vector RotateVector(const Vector& vector) const; // Applies rotation and scale to a vector and returns a result with the W = 0 Vector TransformNormal(const Vector& vector) const; // Applies the transformation to a given point and ensures the resulting W = 1 Vector TransformPoint(const Vector& point) const; // Applies the transformation to a vector ignoring the W value. // Same as TransformPoint with the result W left unchanged Vector TransformVector3(const Vector& vector) const; // Applies the transformation to a given vector with the result W left unchanged Vector TransformVector4(const Vector& vector) const; Vector& operator[](uint32_t i); const Vector operator[](uint32_t i) const; Matrix operator*(const Matrix& rhs) const; Matrix& operator*=(const Matrix& rhs); Matrix operator*(const Quaternion& rhs) const; Matrix operator*=(const Quaternion& rhs); bool operator==(const Matrix& rhs) const; public: union { Vector m_rows[4]; float m_values[4][4]; }; }; } #include "Matrix.inl" ================================================ FILE: MotionCorrection/src/cpp/Math/Matrix.inl ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include #include "Matrix.h" namespace Math { inline Matrix Matrix::FromRotation(const Quaternion& rotation) { return Matrix(rotation); } inline Matrix Matrix::FromTranslation(const Vector& translation) { Matrix M; M.m_rows[0] = Vector::UnitX; M.m_rows[1] = Vector::UnitY; M.m_rows[2] = Vector::UnitZ; M.m_rows[3] = translation.GetWithW1(); return M; } inline Matrix Matrix::FromScale(const Vector& scale) { Matrix M; M.m_rows[0] = _mm_and_ps(scale, SIMD::g_maskX000); M.m_rows[1] = _mm_and_ps(scale, SIMD::g_mask0Y00); M.m_rows[2] = _mm_and_ps(scale, SIMD::g_mask00Z0); M.m_rows[3] = Vector::UnitW; return M; } inline Matrix Matrix::FromUniformScale(float uniformScale) { Matrix M; M.m_rows[0] = _mm_set_ps(0, 0, 0, uniformScale); M.m_rows[1] = _mm_set_ps(0, 0, uniformScale, 0); M.m_rows[2] = _mm_set_ps(0, uniformScale, 0, 0); M.m_rows[3] = Vector::UnitW; return M; } inline Matrix Matrix::FromTranslationAndScale(const Vector& translation, const Vector& scale) { Matrix M; M.m_rows[0] = _mm_and_ps(scale, SIMD::g_maskX000); M.m_rows[1] = _mm_and_ps(scale, SIMD::g_mask0Y00); M.m_rows[2] = _mm_and_ps(scale, SIMD::g_mask00Z0); M.m_rows[3] = translation.GetWithW1(); return M; } inline Matrix Matrix::FromRotationBetweenVectors(Vector const sourceVector, Vector const targetVector) { return Matrix(Quaternion::FromRotationBetweenNormalizedVectors(sourceVector, targetVector)); } inline Matrix::Matrix() { memcpy(this, &Matrix::Identity, sizeof(Matrix)); } inline Matrix::Matrix(NoInit_t) { } inline Matrix::Matrix(ZeroInit_t) { memset(this, 0, sizeof(Matrix)); } inline Matrix::Matrix(const Vector axis, Radians angleRadians) { Vector normal = axis.GetNormalized3(); Vector C0, C1; Vector::SinCos(C0, C1, Vector((float)angleRadians)); Vector C2 = Vector::One - C1; __m128 N0 = _mm_shuffle_ps(normal, normal, _MM_SHUFFLE(3, 0, 2, 1)); __m128 N1 = _mm_shuffle_ps(normal, normal, _MM_SHUFFLE(3, 1, 0, 2)); __m128 V0 = _mm_mul_ps(C2, N0); V0 = _mm_mul_ps(V0, N1); __m128 R0 = _mm_mul_ps(C2, normal); R0 = _mm_mul_ps(R0, normal); R0 = _mm_add_ps(R0, C1); __m128 R1 = _mm_mul_ps(C0, normal); R1 = _mm_add_ps(R1, V0); __m128 R2 = _mm_mul_ps(C0, normal); R2 = _mm_sub_ps(V0, R2); V0 = _mm_and_ps(R0, SIMD::g_maskXYZ0); __m128 V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 1, 2, 0)); V1 = _mm_shuffle_ps(V1, V1, _MM_SHUFFLE(0, 3, 2, 1)); __m128 V2 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(0, 0, 1, 1)); V2 = _mm_shuffle_ps(V2, V2, _MM_SHUFFLE(2, 0, 2, 0)); R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(1, 0, 3, 0)); R2 = _mm_shuffle_ps(R2, R2, _MM_SHUFFLE(1, 3, 2, 0)); m_rows[0] = R2; R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(3, 2, 3, 1)); R2 = _mm_shuffle_ps(R2, R2, _MM_SHUFFLE(1, 3, 0, 2)); m_rows[1] = R2; V2 = _mm_shuffle_ps(V2, V0, _MM_SHUFFLE(3, 2, 1, 0)); m_rows[2] = V2; m_rows[3] = Vector::UnitW; } inline Matrix::Matrix(const AxisAngle axisAngle) : Matrix(Vector(axisAngle.m_axis), axisAngle.m_angle) { } inline Matrix::Matrix(const Quaternion& rotation) { SetRotation(rotation); m_rows[3] = Vector::UnitW; } inline Matrix::Matrix(const Quaternion& rotation, const Vector& translation, const Vector& scale) { SetRotation(rotation); m_rows[0] = m_rows[0] * scale.GetSplatX(); m_rows[1] = m_rows[1] * scale.GetSplatY(); m_rows[2] = m_rows[2] * scale.GetSplatZ(); m_rows[3] = translation.GetWithW1(); } inline Matrix::Matrix(const Quaternion& rotation, const Vector& translation, float scale) : Matrix(rotation, translation, Vector(scale)) { } inline float* Matrix::AsFloatArray() { return &m_values[0][0]; } inline const float* Matrix::AsFloatArray() const { return &m_values[0][0]; } inline const Vector& Matrix::GetRow(uint32_t row) const { return m_rows[row]; } inline const Vector& Matrix::GetAxisX() const { return m_rows[0]; } inline const Vector& Matrix::GetAxisY() const { return m_rows[1]; } inline const Vector& Matrix::GetAxisZ() const { return m_rows[2]; } inline void Matrix::SetAxisX(const Vector& xAxis) { m_rows[0] = xAxis; } inline void Matrix::SetAxisY(const Vector& yAxis) { m_rows[1] = yAxis; } inline void Matrix::SetAxisZ(const Vector& zAxis) { m_rows[2] = zAxis; } inline Float3 Matrix::GetForwardVector() const { return GetAxisZ(); } inline Float3 Matrix::GetRightVector() const { return GetAxisX(); } inline Float3 Matrix::GetUpVector() const { return GetAxisY(); } inline Vector Matrix::GetUnitAxisX() const { return m_rows[0].GetNormalized3(); } inline Vector Matrix::GetUnitAxisY() const { return m_rows[1].GetNormalized3(); } inline Vector Matrix::GetUnitAxisZ() const { return m_rows[2].GetNormalized3(); } inline bool Matrix::IsIdentity() const { __m128 vTemp1 = _mm_cmpeq_ps(m_rows[0], Vector::UnitX); __m128 vTemp2 = _mm_cmpeq_ps(m_rows[1], Vector::UnitY); __m128 vTemp3 = _mm_cmpeq_ps(m_rows[2], Vector::UnitZ); __m128 vTemp4 = _mm_cmpeq_ps(m_rows[3], Vector::UnitW); vTemp1 = _mm_and_ps(vTemp1, vTemp2); vTemp3 = _mm_and_ps(vTemp3, vTemp4); vTemp1 = _mm_and_ps(vTemp1, vTemp3); return (_mm_movemask_ps(vTemp1) == 0x0f); } inline bool Matrix::IsOrthogonal() const { Matrix const transpose = GetTransposed(); Matrix result = *this * transpose; return result.IsIdentity(); } inline bool Matrix::IsOrthonormal() const { static const Vector three(3); auto dotCheck = Vector::Dot3(m_rows[0], m_rows[1]) + Vector::Dot3(m_rows[0], m_rows[2]) + Vector::Dot3(m_rows[1], m_rows[2]); auto magnitudeCheck = m_rows[0].LengthSquared3() + m_rows[1].LengthSquared3() + m_rows[2].LengthSquared3(); auto result = dotCheck + magnitudeCheck; return result.IsNearEqual3(three); } inline Matrix& Matrix::Transpose() { __m128 vTemp1 = _mm_shuffle_ps(m_rows[0], m_rows[1], _MM_SHUFFLE(1, 0, 1, 0)); __m128 vTemp3 = _mm_shuffle_ps(m_rows[0], m_rows[1], _MM_SHUFFLE(3, 2, 3, 2)); __m128 vTemp2 = _mm_shuffle_ps(m_rows[2], m_rows[3], _MM_SHUFFLE(1, 0, 1, 0)); __m128 vTemp4 = _mm_shuffle_ps(m_rows[2], m_rows[3], _MM_SHUFFLE(3, 2, 3, 2)); m_rows[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); m_rows[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); m_rows[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); m_rows[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); return *this; } inline Matrix Matrix::GetTransposed() const { Matrix m = *this; m.Transpose(); return m; } inline Matrix& Matrix::Invert() { Matrix MT = GetTransposed(); __m128 V00 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(1, 1, 0, 0)); __m128 V10 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(3, 2, 3, 2)); __m128 V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(1, 1, 0, 0)); __m128 V11 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(3, 2, 3, 2)); __m128 V02 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[0], _MM_SHUFFLE(2, 0, 2, 0)); __m128 V12 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[1], _MM_SHUFFLE(3, 1, 3, 1)); __m128 D0 = _mm_mul_ps(V00, V10); __m128 D1 = _mm_mul_ps(V01, V11); __m128 D2 = _mm_mul_ps(V02, V12); V00 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(3, 2, 3, 2)); V10 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(1, 1, 0, 0)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(3, 2, 3, 2)); V11 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(1, 1, 0, 0)); V02 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[0], _MM_SHUFFLE(3, 1, 3, 1)); V12 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[1], _MM_SHUFFLE(2, 0, 2, 0)); V00 = _mm_mul_ps(V00, V10); V01 = _mm_mul_ps(V01, V11); V02 = _mm_mul_ps(V02, V12); D0 = _mm_sub_ps(D0, V00); D1 = _mm_sub_ps(D1, V01); D2 = _mm_sub_ps(D2, V02); // V11 = D0Y,D0W,D2Y,D2Y V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 1, 3, 1)); V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(1, 0, 2, 1)); V10 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(0, 3, 0, 2)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(0, 1, 0, 2)); V11 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(2, 1, 2, 1)); // V13 = D1Y,D1W,D2W,D2W __m128 V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 3, 3, 1)); V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(1, 0, 2, 1)); V12 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(0, 3, 0, 2)); __m128 V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(0, 1, 0, 2)); V13 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(2, 1, 2, 1)); __m128 C0 = _mm_mul_ps(V00, V10); __m128 C2 = _mm_mul_ps(V01, V11); __m128 C4 = _mm_mul_ps(V02, V12); __m128 C6 = _mm_mul_ps(V03, V13); // V11 = D0X,D0Y,D2X,D2X V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(0, 0, 1, 0)); V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(2, 1, 3, 2)); V10 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(2, 1, 0, 3)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(1, 3, 2, 3)); V11 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(0, 2, 1, 2)); // V13 = D1X,D1Y,D2Z,D2Z V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(2, 2, 1, 0)); V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(2, 1, 3, 2)); V12 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(2, 1, 0, 3)); V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(1, 3, 2, 3)); V13 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(0, 2, 1, 2)); V00 = _mm_mul_ps(V00, V10); V01 = _mm_mul_ps(V01, V11); V02 = _mm_mul_ps(V02, V12); V03 = _mm_mul_ps(V03, V13); C0 = _mm_sub_ps(C0, V00); C2 = _mm_sub_ps(C2, V01); C4 = _mm_sub_ps(C4, V02); C6 = _mm_sub_ps(C6, V03); V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(0, 3, 0, 3)); // V10 = D0Z,D0Z,D2X,D2Y V10 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 2, 2)); V10 = _mm_shuffle_ps(V10, V10, _MM_SHUFFLE(0, 2, 3, 0)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(2, 0, 3, 1)); // V11 = D0X,D0W,D2X,D2Y V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 3, 0)); V11 = _mm_shuffle_ps(V11, V11, _MM_SHUFFLE(2, 1, 0, 3)); V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(0, 3, 0, 3)); // V12 = D1Z,D1Z,D2Z,D2W V12 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 2, 2)); V12 = _mm_shuffle_ps(V12, V12, _MM_SHUFFLE(0, 2, 3, 0)); V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(2, 0, 3, 1)); // V13 = D1X,D1W,D2Z,D2W V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 3, 0)); V13 = _mm_shuffle_ps(V13, V13, _MM_SHUFFLE(2, 1, 0, 3)); V00 = _mm_mul_ps(V00, V10); V01 = _mm_mul_ps(V01, V11); V02 = _mm_mul_ps(V02, V12); V03 = _mm_mul_ps(V03, V13); __m128 C1 = _mm_sub_ps(C0, V00); C0 = _mm_add_ps(C0, V00); __m128 C3 = _mm_add_ps(C2, V01); C2 = _mm_sub_ps(C2, V01); __m128 C5 = _mm_sub_ps(C4, V02); C4 = _mm_add_ps(C4, V02); __m128 C7 = _mm_add_ps(C6, V03); C6 = _mm_sub_ps(C6, V03); C0 = _mm_shuffle_ps(C0, C1, _MM_SHUFFLE(3, 1, 2, 0)); C2 = _mm_shuffle_ps(C2, C3, _MM_SHUFFLE(3, 1, 2, 0)); C4 = _mm_shuffle_ps(C4, C5, _MM_SHUFFLE(3, 1, 2, 0)); C6 = _mm_shuffle_ps(C6, C7, _MM_SHUFFLE(3, 1, 2, 0)); C0 = _mm_shuffle_ps(C0, C0, _MM_SHUFFLE(3, 1, 2, 0)); C2 = _mm_shuffle_ps(C2, C2, _MM_SHUFFLE(3, 1, 2, 0)); C4 = _mm_shuffle_ps(C4, C4, _MM_SHUFFLE(3, 1, 2, 0)); C6 = _mm_shuffle_ps(C6, C6, _MM_SHUFFLE(3, 1, 2, 0)); __m128 vTemp = Vector::Dot4(C0, MT.m_rows[0]); vTemp = _mm_div_ps(Vector::One, vTemp); m_rows[0] = _mm_mul_ps(C0, vTemp); m_rows[1] = _mm_mul_ps(C2, vTemp); m_rows[2] = _mm_mul_ps(C4, vTemp); m_rows[3] = _mm_mul_ps(C6, vTemp); return *this; } inline Matrix Matrix::GetInverse() const { Matrix m = *this; m.Invert(); return m; } inline Vector Matrix::GetDeterminant() const { Vector V0 = m_rows[2].Shuffle(1, 0, 0, 0); Vector V1 = m_rows[3].Shuffle(2, 2, 1, 1); Vector V2 = m_rows[2].Shuffle(1, 0, 0, 0); Vector V3 = m_rows[3].Shuffle(3, 3, 3, 2); Vector V4 = m_rows[2].Shuffle(2, 2, 1, 1); Vector V5 = m_rows[3].Shuffle(3, 3, 3, 2); Vector P0 = V0 * V1; Vector P1 = V2 * V3; Vector P2 = V4 * V5; V0 = m_rows[2].Shuffle(2, 2, 1, 1); V1 = m_rows[3].Shuffle(1, 0, 0, 0); V2 = m_rows[2].Shuffle(3, 3, 3, 2); V3 = m_rows[3].Shuffle(1, 0, 0, 0); V4 = m_rows[2].Shuffle(3, 3, 3, 2); V5 = m_rows[3].Shuffle(2, 2, 1, 1); P0 = Vector::NegativeMultiplySubtract(V0, V1, P0); P1 = Vector::NegativeMultiplySubtract(V2, V3, P1); P2 = Vector::NegativeMultiplySubtract(V4, V5, P2); V0 = m_rows[1].Shuffle(3, 3, 3, 2); V1 = m_rows[1].Shuffle(2, 2, 1, 1); V2 = m_rows[1].Shuffle(1, 0, 0, 0); static Vector const Sign(1.0f, -1.0f, 1.0f, -1.0f); Vector S = m_rows[0] * Sign; Vector R = V0 * P0; R = Vector::NegativeMultiplySubtract(V1, P1, R); R = Vector::MultiplyAdd(V2, P2, R); return Vector::Dot4(S, R); } inline float Matrix::GetDeterminantAsFloat() const { return GetDeterminant().GetX(); } inline Vector Matrix::GetTranslation() const { return m_rows[3].GetWithW0(); } inline const Vector& Matrix::GetTranslationWithW() const { return m_rows[3]; } inline Matrix& Matrix::SetTranslation(const Vector& v) { m_rows[3] = v.GetWithW1(); return *this; } inline Matrix& Matrix::SetTranslation(const Float3& v) { m_rows[3] = Vector(v, 1.0f); return *this; } inline Matrix& Matrix::SetTranslation(const Float4& v) { m_rows[3] = Vector(v.m_x, v.m_y, v.m_z, 1.0f); return *this; } inline Quaternion Matrix::GetRotation() const { // based on RTM: https://github.com/nfrechette/rtm const Vector& axisX = m_rows[0]; const Vector& axisY = m_rows[1]; const Vector& axisZ = m_rows[2]; // Zero scale is not supported if (axisX.IsNearZero4() || axisY.IsNearZero4() || axisZ.IsNearZero4()) { HALT(); } float const axisX_X = axisX.GetX(); float const axisY_Y = axisY.GetY(); float const axisZ_Z = axisZ.GetZ(); float const mtx_trace = axisX_X + axisY_Y + axisZ_Z; if (mtx_trace > 0.0) { float const axisX_y = axisX.GetY(); float const axisX_z = axisX.GetZ(); float const axisY_x = axisY.GetX(); float const axisY_z = axisY.GetZ(); float const axisZ_x = axisZ.GetX(); float const axisZ_y = axisZ.GetY(); float const inv_trace = Math::Reciprocal(Math::Sqrt(mtx_trace + 1.0f)); float const half_inv_trace = inv_trace * 0.5f; float const m_x = (axisY_z - axisZ_y) * half_inv_trace; float const m_y = (axisZ_x - axisX_z) * half_inv_trace; float const m_z = (axisX_y - axisY_x) * half_inv_trace; float const m_w = Math::Reciprocal(inv_trace) * 0.5f; return Quaternion(m_x, m_y, m_z, m_w).GetNormalized(); } else { // Find the axis with the highest diagonal value int32_t axisIdx0 = 0; if (axisY_Y > axisX_X) { axisIdx0 = 1; } if (axisZ_Z > m_rows[axisIdx0][axisIdx0]) { axisIdx0 = 2; } int32_t const axisIdx1 = (axisIdx0 + 1) % 3; int32_t const axisIdx2 = (axisIdx1 + 1) % 3; float const pseudoTrace = 1.0f + m_rows[axisIdx0][axisIdx0] - m_rows[axisIdx1][axisIdx1] - m_rows[axisIdx2][axisIdx2]; float const inversePseudoTrace = Math::Reciprocal(Math::Sqrt(pseudoTrace)); float const halfInversePseudoTrace = inversePseudoTrace * 0.5f; Float4 rawQuatValues; rawQuatValues[axisIdx0] = Math::Reciprocal(inversePseudoTrace) * 0.5f; rawQuatValues[axisIdx1] = halfInversePseudoTrace * (m_rows[axisIdx0][axisIdx1] + m_rows[axisIdx1][axisIdx0]); rawQuatValues[axisIdx2] = halfInversePseudoTrace * (m_rows[axisIdx0][axisIdx2] + m_rows[axisIdx2][axisIdx0]); rawQuatValues[3] = halfInversePseudoTrace * (m_rows[axisIdx1][axisIdx2] - m_rows[axisIdx2][axisIdx1]); return Quaternion(rawQuatValues).GetNormalized(); } } inline Matrix& Matrix::SetRotation(const Matrix& rotation) { ASSERT(Math::Abs(rotation.GetDeterminant().GetX()) == 1.0f); m_rows[0] = rotation.m_rows[0]; m_rows[1] = rotation.m_rows[1]; m_rows[2] = rotation.m_rows[2]; return *this; } inline Matrix& Matrix::SetRotation(const Quaternion& rotation) { static __m128 const constant1110 = { 1.0f, 1.0f, 1.0f, 0.0f }; __m128 Q0 = _mm_add_ps(rotation, rotation); __m128 Q1 = _mm_mul_ps(rotation, Q0); __m128 V0 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(3, 0, 0, 1)); V0 = _mm_and_ps(V0, SIMD::g_maskXYZ0); __m128 V1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(3, 1, 2, 2)); V1 = _mm_and_ps(V1, SIMD::g_maskXYZ0); __m128 R0 = _mm_sub_ps(constant1110, V0); R0 = _mm_sub_ps(R0, V1); V0 = _mm_shuffle_ps(rotation, rotation, _MM_SHUFFLE(3, 1, 0, 0)); V1 = _mm_shuffle_ps(Q0, Q0, _MM_SHUFFLE(3, 2, 1, 2)); V0 = _mm_mul_ps(V0, V1); V1 = _mm_shuffle_ps(rotation, rotation, _MM_SHUFFLE(3, 3, 3, 3)); __m128 V2 = _mm_shuffle_ps(Q0, Q0, _MM_SHUFFLE(3, 0, 2, 1)); V1 = _mm_mul_ps(V1, V2); __m128 R1 = _mm_add_ps(V0, V1); __m128 R2 = _mm_sub_ps(V0, V1); V0 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(1, 0, 2, 1)); V0 = _mm_shuffle_ps(V0, V0, _MM_SHUFFLE(1, 3, 2, 0)); V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 2, 0, 0)); V1 = _mm_shuffle_ps(V1, V1, _MM_SHUFFLE(2, 0, 2, 0)); Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(1, 0, 3, 0)); Q1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1, 3, 2, 0)); m_rows[0] = Q1; Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(3, 2, 3, 1)); Q1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1, 3, 0, 2)); m_rows[1] = Q1; Q1 = _mm_shuffle_ps(V1, R0, _MM_SHUFFLE(3, 2, 1, 0)); m_rows[2] = Q1; return *this; } inline Matrix& Matrix::SetRotationMaintainingScale(const Matrix& rotation) { Vector const scale = GetScale(); SetRotation(rotation); return SetScale(scale); } inline Matrix& Matrix::SetRotationMaintainingScale(const Quaternion& rotation) { Vector const scale = GetScale(); SetRotation(rotation); return SetScale(scale); } inline Matrix& Matrix::SetScale(float uniformScale) { SetScale(Vector(uniformScale)); return *this; } inline Matrix& Matrix::RemoveScaleFast() { m_rows[0] = m_rows[0].GetNormalized4(); m_rows[1] = m_rows[1].GetNormalized4(); m_rows[2] = m_rows[2].GetNormalized4(); return *this; } inline Matrix& Matrix::SetScaleFast(const Vector& scale) { m_rows[0] = m_rows[0].GetNormalized3() * scale.GetSplatX(); m_rows[1] = m_rows[1].GetNormalized3() * scale.GetSplatY(); m_rows[2] = m_rows[2].GetNormalized3() * scale.GetSplatZ(); return *this; } inline Matrix& Matrix::SetScaleFast(float uniformScale) { SetScaleFast(Vector(uniformScale)); return *this; } inline Vector Matrix::RotateVector(const Vector& vector) const { Vector const X = vector.GetSplatX(); Vector const Y = vector.GetSplatY(); Vector const Z = vector.GetSplatZ(); Vector Result = Z * m_rows[2]; Result = Vector::MultiplyAdd(Y, m_rows[1], Result); Result = Vector::MultiplyAdd(X, m_rows[0], Result); return Result; } inline Vector Matrix::TransformNormal(const Vector& vector) const { return RotateVector(vector); } inline Vector Matrix::TransformPoint(const Vector& point) const { Vector const X = point.GetSplatX(); Vector const Y = point.GetSplatY(); Vector const Z = point.GetSplatZ(); Vector result = Vector::MultiplyAdd(Z, m_rows[2], m_rows[3]); result = Vector::MultiplyAdd(Y, m_rows[1], result); result = Vector::MultiplyAdd(X, m_rows[0], result); Vector const W = result.GetSplatW(); return result / W; } inline Vector Matrix::TransformVector3(const Vector& V) const { Vector const X = V.GetSplatX(); Vector const Y = V.GetSplatY(); Vector const Z = V.GetSplatZ(); Vector result = Vector::MultiplyAdd(Z, m_rows[2], m_rows[3]); result = Vector::MultiplyAdd(Y, m_rows[1], result); result = Vector::MultiplyAdd(X, m_rows[0], result); return result; } inline Vector Matrix::TransformVector4(const Vector& V) const { // Splat m_x,m_y,m_z and m_w Vector vTempX = V.GetSplatX(); Vector vTempY = V.GetSplatY(); Vector vTempZ = V.GetSplatZ(); Vector vTempW = V.GetSplatW(); // Mul by the matrix vTempX = _mm_mul_ps(vTempX, m_rows[0]); vTempY = _mm_mul_ps(vTempY, m_rows[1]); vTempZ = _mm_mul_ps(vTempZ, m_rows[2]); vTempW = _mm_mul_ps(vTempW, m_rows[3]); // Add them all together vTempX = _mm_add_ps(vTempX, vTempY); vTempZ = _mm_add_ps(vTempZ, vTempW); vTempX = _mm_add_ps(vTempX, vTempZ); return vTempX; } inline Vector& Matrix::operator[](uint32_t i) { ASSERT(i < 4); return m_rows[i]; } inline const Vector Matrix::operator[](uint32_t i) const { ASSERT(i < 4); return m_rows[i]; } inline Matrix Matrix::operator*(const Matrix& rhs) const { Matrix result = *this; result *= rhs; return result; } inline Matrix& Matrix::operator*= (const Matrix& rhs) { Vector vX, vY, vZ, vW; // Use vW to hold the original row vW = m_rows[0]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[0] = vX; // Repeat for the other 3 rows vW = m_rows[1]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[1] = vX; vW = m_rows[2]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[2] = vX; vW = m_rows[3]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[3] = vX; return *this; } inline Matrix Matrix::operator*(const Quaternion& rhs) const { return operator*(Matrix(rhs)); } inline Matrix Matrix::operator*=(const Quaternion& rhs) { return operator*=(Matrix(rhs)); } inline bool Matrix::operator==(const Matrix& rhs) const { for (auto i = 0; i < 4; i++) { for (auto j = 0; j < 4; j++) { if (m_values[i][j] != rhs.m_values[i][j]) { return false; } } } return true; } } ================================================ FILE: MotionCorrection/src/cpp/Math/Quaternion.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "Quaternion.h" #include "Matrix.h" namespace Math { Quaternion const Quaternion::Identity(0, 0, 0, 1); // Rotation order is XYZ EulerAngles Quaternion::ToEulerAngles() const { return Matrix(*this).ToEulerAngles(); } Quaternion Quaternion::LookRotation(const Vector& forward, const Vector& up) { const Vector t = Vector::Cross3(up, forward).Normalize3(); return Matrix(t, Vector::Cross3(forward, t), forward).GetRotation(); } } ================================================ FILE: MotionCorrection/src/cpp/Math/Quaternion.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Vector.h" namespace Math { class alignas(16) Quaternion { public: static Quaternion const Identity; // Calculate the rotation required to align the source vector to the target vector (shortest path) static Quaternion FromRotationBetweenNormalizedVectors(const Vector& sourceVector, const Vector& targetVector); // Calculate the rotation required to align one vector onto another but also taking account a fallback rotation axis for opposite parallel vectors static Quaternion FromRotationBetweenNormalizedVectors(const Vector& sourceVector, const Vector& targetVector, const Vector& fallbackRotationAxis); // Calculate the rotation required to align the source vector to the target vector (shortest path) static Quaternion FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector); // Normalized LERP - not accurate - only use for really short distances static Quaternion NLerp(const Quaternion& from, const Quaternion& to, float t); // Standard and accurate Spherical LERP - based on DirectX Math static Quaternion SLerp(const Quaternion& from, const Quaternion& to, float t); // Fast approximation of a Spherical LERP - based on "A fast and accurate estimate for SLERP" by David Eberly static Quaternion FastSLerp(const Quaternion& from, const Quaternion& to, float t); // Spherical quadrangle/cubic interpolation for quaternions static Quaternion SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t); // Calculate the shortest delta quaternion needed to rotate 'from' onto 'to' static Quaternion Delta(const Quaternion& from, const Quaternion& to); // Simple vector dot product between two quaternions static Vector Dot(const Quaternion& q0, const Quaternion& q1); // Calculate the angular distance between two quaternions static Radians Distance(const Quaternion& q0, const Quaternion& q1); // Calculate look rotation given forward and up vectors static Quaternion LookRotation(const Vector& forward, const Vector& up); public: Quaternion() = default; explicit Quaternion(NoInit_t); explicit Quaternion(IdentityInit_t); explicit Quaternion(const Vector& v); explicit Quaternion(float ix, float iy, float iz, float iw); explicit Quaternion(const Float4& v); explicit Quaternion(const Vector& axis, Radians angle); explicit Quaternion(AxisAngle axisAngle); explicit Quaternion(const EulerAngles& eulerAngles); explicit Quaternion(Radians rotX, Radians rotY, Radians rotZ); operator __m128& (); operator const __m128& () const; Float4 ToFloat4() const; Vector ToVector() const; Vector Length(); float GetLength() const; // Get the angle this rotation represents around the specified axis Radians GetAngle() const; AxisAngle ToAxisAngle() const; EulerAngles ToEulerAngles() const; Vector RotateVector(const Vector& vector) const; Vector RotateVectorInverse(const Vector& vector) const; Quaternion& Conjugate(); Quaternion GetConjugate() const; Quaternion& Negate(); Quaternion GetNegated() const; Quaternion& Invert(); Quaternion GetInverse() const; Quaternion& Normalize(); Quaternion GetNormalized() const; Vector XAxis() const noexcept; Vector YAxis() const noexcept; Vector ZAxis() const noexcept; // Ensure that this rotation is the shortest in terms of the angle (i.e. -5 instead of 355) Quaternion& MakeShortestPath(); // Ensure that this rotation is the shortest in terms of the angle (i.e. -5 instead of 355) Quaternion GetShortestPath() const; // This function will return the estimated normalized quaternion, this is not super accurate but a lot faster (use with care) Quaternion& NormalizeInaccurate(); // This function will return the estimated normalized quaternion, this is not super accurate but a lot faster (use with care) Quaternion GetNormalizedInaccurate() const; bool IsNormalized() const; bool IsIdentity() const; // Concatenate the rotation of this onto rhs and return the result i.e. first rotate by rhs then by this // This means order of rotation is right-to-left: child-rotation * parent-rotation Quaternion operator*(const Quaternion& rhs) const; Quaternion& operator*=(const Quaternion& rhs); // Is the distance between this quaternion and another one under the threshold? bool IsNearEqual(const Quaternion& rhs, Radians const threshold = Math::DegreesToRadians) const; // Exact equality bool operator==(const Quaternion& rhs) const; // Exact equality bool operator!=(const Quaternion& rhs) const; private: Vector GetSplatW() const; float GetW() const; Quaternion& operator=(const Vector& v) = delete; public: __m128 m_data; }; static_assert(sizeof(Vector) == 16, "Quaternion size must be 16 bytes!"); } #include "Quaternion.inl" ================================================ FILE: MotionCorrection/src/cpp/Math/Quaternion.inl ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Quaternion.h" namespace Math { inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to) { ASSERT(from.IsNormalized3() && to.IsNormalized3()); Quaternion result; // Parallel vectors - return zero rotation Vector const dot = Vector::Dot3(from, to); if (dot.IsGreaterThanEqual4(Vector::OneMinusEpsilon)) { result = Quaternion::Identity; } // Opposite vectors - return 180 rotation around any orthogonal axis else if (dot.IsLessThanEqual4(Vector::EpsilonMinusOne)) { Float4 const fromValues = from.ToFloat4(); result = Quaternion(-fromValues.m_z, fromValues.m_y, fromValues.m_x, 0); result.Normalize(); } else // Calculate quaternion rotation { Vector const cross = Vector::Cross3(from, to); Vector Q = Vector::Select(cross, dot, Vector::Select0001); Q += Vector::Select(Vector::Zero, Q.Length4(), Vector::Select0001); result = Quaternion(Q); result.Normalize(); } return result; } inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to, const Vector& fallbackRotationAxis) { ASSERT(from.IsNormalized3() && to.IsNormalized3()); Quaternion Q(NoInit); Vector rotationAxis = from.Cross3(to).GetNormalized3(); if (rotationAxis.GetLengthSquared3() == 0) { rotationAxis = fallbackRotationAxis; } float const dot = from.GetDot3(to); if (dot >= (1.0f - Math::Epsilon)) { Q = Quaternion::Identity; } else { float const angle = Math::ACos(dot); Q = Quaternion(rotationAxis, angle); } return Q; } inline Quaternion Quaternion::FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector) { return FromRotationBetweenNormalizedVectors( sourceVector.GetNormalized3(), targetVector.GetNormalized3()); } inline Quaternion Quaternion::NLerp(const Quaternion& from, const Quaternion& to, float T) { ASSERT(T >= 0.0f && T <= 1.0f); Quaternion adjustedFrom(from); // Ensure that the rotations are in the same direction if (Quaternion::Dot(from, to).IsLessThan4(Vector::Zero)) { adjustedFrom.Negate(); } Quaternion result(Vector::Lerp(adjustedFrom.ToVector(), to.ToVector(), T)); result.Normalize(); return result; } inline Quaternion Quaternion::SLerp(const Quaternion& from, const Quaternion& to, float T) { ASSERT(T >= 0.0f && T <= 1.0f); static SIMD::UIntMask const maskSign = { 0x80000000,0x00000000,0x00000000,0x00000000 }; static __m128 const oneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f }; Vector const VecT(T); Vector cosOmega = Quaternion::Dot(from, to); Vector control = cosOmega.LessThan(Vector::Zero); Vector sign = Vector::Select(Vector::One, Vector::NegativeOne, control); cosOmega = _mm_mul_ps(cosOmega, sign); control = cosOmega.LessThan(oneMinusEpsilon); Vector sinOmega = _mm_mul_ps(cosOmega, cosOmega); sinOmega = _mm_sub_ps(Vector::One, sinOmega); sinOmega = _mm_sqrt_ps(sinOmega); Vector omega = Vector::ATan2(sinOmega, cosOmega); Vector V01 = _mm_shuffle_ps(VecT, VecT, _MM_SHUFFLE(2, 3, 0, 1)); V01 = _mm_and_ps(V01, SIMD::g_maskXY00); V01 = _mm_xor_ps(V01, maskSign); V01 = _mm_add_ps(Vector::UnitX, V01); Vector S0 = _mm_mul_ps(V01, omega); S0 = Vector::Sin(S0); S0 = _mm_div_ps(S0, sinOmega); S0 = Vector::Select(V01, S0, control); Vector S1 = S0.GetSplatY(); S0 = S0.GetSplatX(); S1 = _mm_mul_ps(S1, sign); Vector result = _mm_mul_ps(from, S0); S1 = _mm_mul_ps(S1, to); result = _mm_add_ps(result, S1); return Quaternion(result); } inline Quaternion Quaternion::FastSLerp(const Quaternion& q0, const Quaternion& q1, float t) { // Precomputed constants constexpr float const mu = 1.85298109240830f; static Vector const u0123 = _mm_setr_ps(1.f / (1 * 3), 1.f / (2 * 5), 1.f / (3 * 7), 1.f / (4 * 9)); static Vector const u4567 = _mm_setr_ps(1.f / (5 * 11), 1.f / (6 * 13), 1.f / (7 * 15), mu / (8 * 17)); static Vector const v0123 = _mm_setr_ps(1.f / 3, 2.f / 5, 3.f / 7, 4.f / 9); static Vector const v4567 = _mm_setr_ps(5.f / 11, 6.f / 13, 7.f / 15, mu * 8 / 17); static Vector const vSignMask = _mm_set1_ps(-0.f); // Common code for computing the scalar coefficients of SLERP auto CalculateCoefficient = [](Vector vT, Vector xm1) { Vector const vTSquared = vT * vT; // ( b4, b5, b6, b7 ) = ( x-1 ) * ( u4 * t^2 - v4, u5 * t^2 - v5, u6 * t^2 - v6, u7 * t^2 - v7 ) Vector b4567 = Vector::MultiplySubtract(u4567, vTSquared, v4567); b4567 *= xm1; // ( b7, b7, b7, b7 ) Vector b = b4567.GetSplatW(); Vector c = b + Vector::One; // ( b6, b6, b6, b6 ) b = b4567.GetSplatZ(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b5, b5, b5, b5 ) b = b4567.GetSplatY(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b4, b4, b4, b4 ) b = b4567.GetSplatX(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b0, b1, b2, b3 ) = // ( x-1)*(u0* t^2-v0, u1 * t^2 -v1, u2* t^2-v2, u3* t^2-v3 ) Vector b0123 = Vector::MultiplySubtract(u0123, vTSquared, v0123); b0123 *= xm1; // ( b3, b3, b3, b3 ) b = b0123.GetSplatW(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b2, b2, b2, b2 ) b = b0123.GetSplatZ(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b1, b1, b1, b1 ) b = b0123.GetSplatY(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b0, b0, b0, b0 ) b = b0123.GetSplatX(); c = Vector::MultiplyAdd(b, c, Vector::One); c *= vT; return c; }; Vector x = Vector::Dot4(q0.m_data, q1.m_data); // cos ( theta ) in all components Vector sign = _mm_and_ps(vSignMask, x); x = _mm_xor_ps(sign, x); Vector localQ1 = _mm_xor_ps(sign, q1); Vector xm1 = x - Vector::One; Vector cT = CalculateCoefficient(Vector(t), xm1); Vector cD = CalculateCoefficient(Vector(1.0f - t), xm1); cT = cT * localQ1; Quaternion result(Vector::MultiplyAdd(cD, q0.m_data, cT)); return result; } inline Quaternion Quaternion::SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t) { ASSERT(t >= 0.0f && t <= 1.0f); Quaternion const q03 = Quaternion::SLerp(q0, q3, t); Quaternion const q12 = Quaternion::SLerp(q1, q2, t); t = (t - (t * t)) * 2; Quaternion const result = Quaternion::SLerp(q03, q12, t); return result; } inline Quaternion Quaternion::Delta(const Quaternion& from, const Quaternion& to) { return to * from.GetInverse(); } inline Vector Quaternion::Dot(const Quaternion& q0, const Quaternion& q1) { return Vector::Dot4(q0.m_data, q1.m_data); } inline Radians Quaternion::Distance(const Quaternion& q0, const Quaternion& q1) { float const dot = Math::Clamp(Dot(q0, q1).ToFloat(), -1.0f, 1.0f); return Radians(2 * Math::ACos(Math::Abs(dot))); } inline Quaternion::Quaternion(NoInit_t) { } inline Quaternion::Quaternion(IdentityInit_t) : m_data(Vector::UnitW.m_data) { } inline Quaternion::Quaternion(const Vector& v) : m_data(v.m_data) { } inline Quaternion::Quaternion(float ix, float iy, float iz, float iw) { m_data = _mm_set_ps(iw, iz, iy, ix); } inline Quaternion::Quaternion(const Float4& v) : Quaternion(v.m_x, v.m_y, v.m_z, v.m_w) { } inline Quaternion::Quaternion(const Vector& axis, Radians angle) { ASSERT(axis.IsNormalized3()); auto N = _mm_and_ps(axis, SIMD::g_maskXYZ0); N = _mm_or_ps(N, Vector::UnitW); auto scale = _mm_set_ps1(0.5f * (float)angle); Vector sine, cosine; Vector::SinCos(sine, cosine, scale); scale = _mm_and_ps(sine, SIMD::g_maskXYZ0); cosine = _mm_and_ps(cosine, SIMD::g_mask000W); scale = _mm_or_ps(scale, cosine); N = _mm_mul_ps(N, scale); m_data = N; } inline Quaternion::Quaternion(AxisAngle axisAngle) : Quaternion(Vector(axisAngle.m_axis), axisAngle.m_angle) { } inline Quaternion::Quaternion(const EulerAngles& eulerAngles) { auto const rotationX = Quaternion(Vector::UnitX, eulerAngles.m_x); auto const rotationY = Quaternion(Vector::UnitY, eulerAngles.m_y); auto const rotationZ = Quaternion(Vector::UnitZ, eulerAngles.m_z); // Rotation order is XYZ - all in global space, hence the order is reversed m_data = (rotationX * rotationY * rotationZ).GetNormalized().m_data; } inline Quaternion::Quaternion(Radians rotX, Radians rotY, Radians rotZ) : Quaternion(EulerAngles(rotX, rotY, rotZ)) { } inline Quaternion::operator __m128& () { return m_data; } inline Quaternion::operator const __m128& () const { return m_data; } inline Float4 Quaternion::ToFloat4() const { Float4 v; _mm_storeu_ps(&v.m_x, m_data); return v; } inline Vector Quaternion::ToVector() const { return Vector(m_data); } inline Vector Quaternion::Length() { return ToVector().Length4(); } inline float Quaternion::GetLength() const { return ToVector().GetLength4(); } inline Radians Quaternion::GetAngle() const { return Radians(2.0f * Math::ACos(GetW())); } inline AxisAngle Quaternion::ToAxisAngle() const { return AxisAngle(ToVector(), Radians(2.0f * Math::ACos(GetW()))); } inline Vector Quaternion::RotateVector(const Vector& vector) const { Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110)); Quaternion const result = GetConjugate() * A; return (result * *this).ToVector(); } inline Vector Quaternion::RotateVectorInverse(const Vector& vector) const { Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110)); Quaternion const result = *this * A; return (result * GetConjugate()).ToVector(); } inline Quaternion& Quaternion::Conjugate() { static __m128 const conj = { -1.0f, -1.0f, -1.0f, 1.0f }; m_data = _mm_mul_ps(*this, conj); return *this; } inline Quaternion Quaternion::GetConjugate() const { Quaternion q = *this; q.Conjugate(); return q; } inline Quaternion& Quaternion::Negate() { m_data = _mm_mul_ps(*this, Vector::NegativeOne); return *this; } inline Quaternion Quaternion::GetNegated() const { Quaternion q = *this; q.Negate(); return q; } inline Quaternion& Quaternion::Invert() { Vector const conjugate(GetConjugate().m_data); Vector const length = ToVector().Length4(); Vector const mask = length.LessThanEqual(Vector::Epsilon); Vector const result = conjugate / length; m_data = result.Select(result, Vector::Zero, mask); return *this; } inline Quaternion Quaternion::GetInverse() const { Quaternion q = *this; q.Invert(); return q; } inline Quaternion& Quaternion::Normalize() { m_data = ToVector().GetNormalized4().m_data; return *this; } inline Quaternion Quaternion::GetNormalized() const { Quaternion q = *this; q.Normalize(); return q; } inline Vector Quaternion::XAxis() const noexcept { const float x = _mm_cvtss_f32(m_data); const float y = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1))); const float z = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2))); const float w = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3))); const float s = 2.0f * w; const float x2 = 2.0f * x; return Vector( x2 * x + s * w - 1.0f, x2 * y + s * z, x2 * z + s * -y); } inline Vector Quaternion::YAxis() const noexcept { const float x = _mm_cvtss_f32(m_data); const float y = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1))); const float z = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2))); const float w = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3))); const float s = 2.0f * w; const float y2 = 2.0f * y; return Vector( y2 * x + s * -z, y2 * y + s * w - 1.0f, y2 * z + s * x); } inline Vector Quaternion::ZAxis() const noexcept { const float x = _mm_cvtss_f32(m_data); const float y = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1))); const float z = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2))); const float w = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3))); const float s = 2.0f * w; const float z2 = 2.0f * z; return Vector( x * z2 + s * y, y * z2 + s * -x, z * z2 + s * w - 1.0f); } inline Quaternion& Quaternion::MakeShortestPath() { // If we have a > 180 angle, negate // w < 0.0f is the same as dot( identity, q ) < 0 if (GetW() < 0.0f) { Negate(); } return *this; } inline Quaternion Quaternion::GetShortestPath() const { Quaternion sp = *this; sp.MakeShortestPath(); return sp; } inline Quaternion& Quaternion::NormalizeInaccurate() { *this = GetNormalizedInaccurate(); return *this; } inline Quaternion Quaternion::GetNormalizedInaccurate() const { __m128 vLengthSq = _mm_mul_ps(m_data, m_data); __m128 vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); vLengthSq = _mm_add_ps(vLengthSq, vTemp); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); vLengthSq = _mm_add_ps(vLengthSq, vTemp); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); // Get the reciprocal and mul to perform the normalization Quaternion result; result.m_data = _mm_rsqrt_ps(vLengthSq); result.m_data = _mm_mul_ps(result.m_data, m_data); return result; } inline bool Quaternion::IsNormalized() const { return ToVector().IsNormalized4(); } inline bool Quaternion::IsIdentity() const { return ToVector().IsEqual3(Vector::UnitW); } inline Quaternion Quaternion::operator*(const Quaternion& rhs) const { static const __m128 controlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f }; static const __m128 controlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f }; static const __m128 controlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f }; // Copy to SSE registers and use as few as possible for x86 __m128 Q2X = rhs; __m128 Q2Y = rhs; __m128 Q2Z = rhs; __m128 vResult = rhs; // Splat with one instruction vResult = _mm_shuffle_ps(vResult, vResult, _MM_SHUFFLE(3, 3, 3, 3)); Q2X = _mm_shuffle_ps(Q2X, Q2X, _MM_SHUFFLE(0, 0, 0, 0)); Q2Y = _mm_shuffle_ps(Q2Y, Q2Y, _MM_SHUFFLE(1, 1, 1, 1)); Q2Z = _mm_shuffle_ps(Q2Z, Q2Z, _MM_SHUFFLE(2, 2, 2, 2)); // Retire Q1 and perform Q1*Q2W vResult = _mm_mul_ps(vResult, *this); __m128 Q1Shuffle = *this; // Shuffle the copies of Q1 Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); // Mul by Q1WZYX Q2X = _mm_mul_ps(Q2X, Q1Shuffle); Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1)); // Flip the signs on m_y and m_z Q2X = _mm_mul_ps(Q2X, controlWZYX); // Mul by Q1ZWXY Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle); Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); // Flip the signs on m_z and m_w Q2Y = _mm_mul_ps(Q2Y, controlZWXY); // Mul by Q1YXWZ Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle); vResult = _mm_add_ps(vResult, Q2X); // Flip the signs on m_x and m_w Q2Z = _mm_mul_ps(Q2Z, controlYXWZ); Q2Y = _mm_add_ps(Q2Y, Q2Z); vResult = _mm_add_ps(vResult, Q2Y); return Quaternion(vResult); } inline Quaternion& Quaternion::operator*=(const Quaternion& rhs) { *this = *this * rhs; return *this; } inline bool Quaternion::IsNearEqual(const Quaternion& rhs, Radians const threshold) const { return Quaternion::Distance(*this, rhs) <= threshold; } inline bool Quaternion::operator==(const Quaternion& rhs) const { return ToVector() == rhs.ToVector(); } inline bool Quaternion::operator!=(const Quaternion& rhs) const { return !operator==(rhs); } inline Vector Quaternion::GetSplatW() const { return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3)); } inline float Quaternion::GetW() const { auto vTemp = GetSplatW(); return _mm_cvtss_f32(vTemp); } } ================================================ FILE: MotionCorrection/src/cpp/Math/SIMD.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include #include namespace SIMD { struct alignas( 16 ) IntMask { inline operator __m128( ) const { return reinterpret_cast<__m128 const&>( *this ); } inline operator __m128i( ) const { return _mm_castps_si128( *this ); } inline operator __m128d( ) const { return _mm_castps_pd( *this ); } int32_t i[4]; }; struct alignas( 16 ) UIntMask { inline operator __m128( ) const { return reinterpret_cast<__m128 const&>( *this ); } inline operator __m128i( ) const { return _mm_castps_si128( *this ); } inline operator __m128d( ) const { return _mm_castps_pd( *this ); } uint32_t v[4]; }; struct alignas( 16 ) FloatMask { inline operator __m128() const { return reinterpret_cast<__m128 const&>( *this ); } inline operator __m128i() const { return _mm_castps_si128( *this ); } inline operator __m128d() const { return _mm_castps_pd( *this ); } float v[4]; }; // Int Operations //------------------------------------------------------------------------- namespace Int { FORCE_INLINE bool Equal( __m128 V1, __m128 V2 ) { __m128i vTemp = _mm_cmpeq_epi32( _mm_castps_si128( V1 ), _mm_castps_si128( V2 ) ); return ( ( ( _mm_movemask_ps( _mm_castsi128_ps( vTemp ) ) & 7 ) == 7 ) != 0 ); } FORCE_INLINE bool NotEqual( __m128 V1, __m128 V2 ) { __m128i vTemp = _mm_cmpeq_epi32( _mm_castps_si128( V1 ), _mm_castps_si128( V2 ) ); return ( ( _mm_movemask_ps( _mm_castsi128_ps( vTemp ) ) != 0xF ) != 0 ); } FORCE_INLINE __m128 And( __m128 V1, __m128 V2 ) { return _mm_and_ps( V1, V2 ); } FORCE_INLINE __m128 Or( __m128 V1, __m128 V2 ) { __m128i V = _mm_or_si128( _mm_castps_si128( V1 ), _mm_castps_si128( V2 ) ); return _mm_castsi128_ps( V ); } } //------------------------------------------------------------------------- static __m128 const g_sinCoefficients0 = { -0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f }; static __m128 const g_sinCoefficients1 = { -2.3889859e-08f, -0.16665852f, +0.0083139502f, -0.00018524670f }; static __m128 const g_cosCoefficients0 = { -0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f }; static __m128 const g_cosCoefficients1 = { -2.6051615e-07f, -0.49992746f, +0.041493919f, -0.0012712436f }; static __m128 const g_tanCoefficients0 = { 1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f }; static __m128 const g_tanCoefficients1 = { 2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f }; static __m128 const g_tanCoefficients2 = { 5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f }; static __m128 const g_arcCoefficients0 = { +1.5707963050f, -0.2145988016f, +0.0889789874f, -0.0501743046f }; static __m128 const g_arcCoefficients1 = { +0.0308918810f, -0.0170881256f, +0.0066700901f, -0.0012624911f }; static __m128 const g_aTanCoefficients0 = { -0.3333314528f, +0.1999355085f, -0.1420889944f, +0.1065626393f }; static __m128 const g_aTanCoefficients1 = { -0.0752896400f, +0.0429096138f, -0.0161657367f, +0.0028662257f }; static __m128 const g_aTanEstCoefficients0 = { +0.999866f, +0.999866f, +0.999866f, +0.999866f }; static __m128 const g_aTanEstCoefficients1 = { -0.3302995f, +0.180141f, -0.085133f, +0.0208351f }; static __m128 const g_tanEstCoefficients = { 2.484f, -1.954923183e-1f, 2.467401101f, Math::OneDivPi }; static __m128 const g_arcEstCoefficients = { +1.5707288f,-0.2121144f,+0.0742610f,-0.0187293f }; static __m128 const g_aTan2Constants = { Math::Pi, Math::PiDivTwo, Math::PiDivFour, 2.3561944905f /* 3/4 Pi */ }; //------------------------------------------------------------------------- static FloatMask const g_noFraction = { 8388608.0f,8388608.0f,8388608.0f,8388608.0f }; static IntMask const g_absMask = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; static UIntMask const g_trueMask = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; static UIntMask const g_signMask = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; static UIntMask const g_maskX000 = { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 }; static UIntMask const g_mask0Y00 = { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 }; static UIntMask const g_mask00Z0 = { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 }; static UIntMask const g_mask000W = { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }; static UIntMask const g_maskXY00 = { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 }; static UIntMask const g_maskXYZ0 = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }; } ================================================ FILE: MotionCorrection/src/cpp/Math/Scalar.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Compiler.h" #include "Debug.h" #include "Constants.h" #include #include // // Scalar related methods // namespace Math { FORCE_INLINE float Sin( float value ) { return sinf( value ); } FORCE_INLINE float Cos( float value ) { return cosf( value ); } FORCE_INLINE float Tan( float value ) { return tanf( value ); } FORCE_INLINE float ASin( float value ) { return asinf( value ); } FORCE_INLINE float ACos( float value ) { return acosf( value ); } FORCE_INLINE float ATan( float value ) { return atanf( value ); } FORCE_INLINE float ATan2( float y, float x ) { return atan2f( y, x ); } FORCE_INLINE float Cosec( float value ) { return 1.0f / sinf( value ); } FORCE_INLINE float Sec( float value ) { return 1.0f / cosf( value ); } FORCE_INLINE float Cot( float value ) { return 1.0f / tanf( PiDivTwo - value ); } FORCE_INLINE float Pow( float x, float y ) { return powf( x, y ); } FORCE_INLINE float Sqr( float value ) { return value * value; } FORCE_INLINE float Sqrt( float value ) { return sqrtf( value ); } FORCE_INLINE float Log( float value ) { return logf( value ); } FORCE_INLINE float Log2f( float value ) { return log2f( value ); } FORCE_INLINE float AddToMovingAverage( float currentAverage, uint64_t numCurrentSamples, float newValue ) { return currentAverage + ( ( newValue - currentAverage ) / float( numCurrentSamples + 1 ) ); } FORCE_INLINE float Abs( float a ) { return fabsf( a ); } FORCE_INLINE double Abs( double a ) { return fabs( a ); } FORCE_INLINE int8_t Abs( int8_t a ) { return (int8_t) abs( a ); } FORCE_INLINE int16_t Abs( int16_t a ) { return (int16_t) abs( a ); } FORCE_INLINE int32_t Abs( int32_t a ) { return labs( a ); } FORCE_INLINE int64_t Abs( int64_t a ) { return llabs( a ); } FORCE_INLINE float Reciprocal( float r ) { return 1.0f / r; } FORCE_INLINE double Reciprocal( double r ) { return 1.0 / r; } template FORCE_INLINE T Min( T a, T b ) { return a <= b ? a : b; } template FORCE_INLINE T Max( T a, T b ) { return a >= b ? a : b; } template FORCE_INLINE T AbsMin( T a, T b ) { return Abs( a ) <= Abs( b ) ? a : b; } template FORCE_INLINE T AbsMax( T a, T b ) { return Abs( a ) >= Abs( b ) ? a : b; } template FORCE_INLINE T Sqrt( T a ) { return sqrt( a ); } template FORCE_INLINE T Clamp( T value, T lowerBound, T upperBound ) { ASSERT( lowerBound <= upperBound ); return Min( Max( value, lowerBound ), upperBound ); } template FORCE_INLINE bool IsInRangeInclusive( T value, T lowerBound, T upperBound ) { ASSERT( lowerBound < upperBound ); return value >= lowerBound && value <= upperBound; } template FORCE_INLINE bool IsInRangeExclusive( T value, T lowerBound, T upperBound ) { ASSERT( lowerBound < upperBound ); return value > lowerBound && value < upperBound; } // Decomposes a float into integer and remainder portions, remainder is return and the integer result is stored in the integer portion FORCE_INLINE float ModF( float value, float& integerPortion ) { return modff( value, &integerPortion ); } // Returns the floating point remainder of x/y FORCE_INLINE float FModF( float x, float y ) { return fmodf( x, y ); } template FORCE_INLINE T Lerp( T A, T B, float t ) { return A + ( B - A ) * t; } FORCE_INLINE float PercentageThroughRange( float value, float lowerBound, float upperBound ) { ASSERT( lowerBound < upperBound ); return Clamp( value, lowerBound, upperBound ) / ( upperBound - lowerBound ); } FORCE_INLINE bool IsNearEqual( float value, float comparand, float epsilon = Epsilon ) { return fabsf( value - comparand ) <= epsilon; } FORCE_INLINE bool IsNearZero( float value, float epsilon = Epsilon ) { return fabsf( value ) <= epsilon; } FORCE_INLINE bool IsNearEqual( double value, double comparand, double epsilon = Epsilon ) { return fabs( value - comparand ) <= epsilon; } FORCE_INLINE bool IsNearZero( double value, double epsilon = Epsilon ) { return fabs( value ) <= epsilon; } FORCE_INLINE float Ceiling( float value ) { return ceilf( value ); } FORCE_INLINE int32_t CeilingToInt( float value ) { return (int32_t) ceilf( value ); } FORCE_INLINE float Floor( float value ) { return floorf( value ); } FORCE_INLINE int32_t FloorToInt( float value ) { return (int32_t) floorf( value ); } FORCE_INLINE float Round( float value ) { return roundf( value ); } FORCE_INLINE int32_t RoundToInt( float value ) { return (int32_t) roundf( value ); } inline float RemapRange( float value, float fromRangeBegin, float fromRangeEnd, float toRangeBegin, float toRangeEnd ) { float const fromRangeLength = fromRangeEnd - fromRangeBegin; float const percentageThroughFromRange = Clamp( ( value - fromRangeBegin ) / fromRangeLength, 0.0f, 1.0f ); float const toRangeLength = toRangeEnd - toRangeBegin; float const result = toRangeBegin + ( percentageThroughFromRange * toRangeLength ); return result; } FORCE_INLINE float Square( float value ) { return value * value; } FORCE_INLINE float SmoothStep01( float value ) { value = Clamp( value, 0.0f, 1.0f ); return value * value * ( 3.0f - 2.0f * value ); } } ================================================ FILE: MotionCorrection/src/cpp/Math/Transform.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "Transform.h" namespace Math { Transform const Transform::Identity = Transform(Quaternion(0, 0, 0, 1), Vector(0, 0, 0, 1), 1.0f); void Transform::SanitizeScaleValue() { if (Math::IsNearEqual(GetScale(), 1.0f, Math::LargeEpsilon)) { SetScale(1.0f); } } } ================================================ FILE: MotionCorrection/src/cpp/Math/Transform.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Matrix.h" namespace Math { // // VQS Transform // class Transform { public: static Transform const Identity; static Transform FromRotation(const Quaternion& rotation); static Transform FromTranslation(const Vector& translation); static Transform FromScale(float uniformScale); static Transform FromTranslationAndScale(const Vector& translation, float uniformScale); static Transform FromRotationBetweenVectors(const Vector sourceVector, const Vector targetVector); // Linearly interpolate between two transforms - uses NLerp for rotations static Transform Lerp(const Transform& from, const Transform& to, float t); // Spherically interpolate between two transforms - uses SLerp for rotations static Transform Slerp(const Transform& from, const Transform& to, float t); // Spherically interpolate between two transforms - uses FastSLerp (SLerp approximation) for rotations static Transform FastSlerp(const Transform& from, const Transform& to, float t); // Calculate a delta transform that you can concatenate to the // 'from' transform to get the 'to' transform. Properly handles the non-uniform scaling case. static Transform Delta(const Transform& from, const Transform& to); // Calculates a delta transform that you can concatenate to the // 'from' transform to get the 'to' transform (ignoring scale) static Transform DeltaNoScale(const Transform& from, const Transform& to); static void DirectlySetRotation(Transform& transform, Quaternion&& rotation); static void DirectlySetRotation(Transform& transform, const Quaternion& rotation); static void DirectlySetTranslationScale(Transform& transform, Vector&& translationScale); static void DirectlySetTranslationScale(Transform& transform, const Vector& translationScale); public: Transform() = default; explicit Transform(NoInit_t); explicit Transform(const Matrix& m); explicit Transform( const Quaternion& rotation, const Vector& translation = Vector(0, 0, 0, 0), float scale = 1.0f); explicit Transform(const AxisAngle& rotation); Matrix ToMatrix() const; Matrix ToMatrixNoScale() const; EulerAngles ToEulerAngles() const; Vector GetAxisX() const; Vector GetAxisY() const; Vector GetAxisZ() const; Vector GetRightVector() const; Vector GetForwardVector() const; Vector GetUpVector() const; bool IsIdentity() const; bool IsRigidTransform() const; void MakeRigidTransform(); // // Inverse and Deltas // // Invert this transform. // If you want a delta transform that you can // concatenate, then you should use the 'Delta' functions Transform& Inverse(); // Get the inverse of this transform. // If you want a delta transform that you can // concatenate, then you should use the 'Delta' functions Transform GetInverse() const; // Return the delta required to a given target // transform (i.e., what do we need to add to reach that transform) Transform GetDeltaToOther(const Transform& targetTransform) const; // Return the delta relative from a given a start // transform (i.e., how much do we differ from it) Transform GetDeltaFromOther(const Transform& startTransform) const; // // Rotation const Quaternion& GetRotation() const; void SetRotation(const Quaternion& rotation); void AddRotation(const Quaternion& delta); // // Translation // // Get the translation for this transform // NOTE: you cannot rely on the W value as that will be the scale const Vector& GetTranslation() const; // Get the translation and scale for this transform const Vector& GetTranslationAndScale() const; // Set the translation void SetTranslation(const Vector& newTranslation); // Set the translation and scale simultaneously void SetTranslationAndScale(const Vector& newTranslationScale); // Add an offset to the current translation void AddTranslation(const Vector& translationDelta); // Get the translation as a homogeneous coordinates' vector (W=0) Vector GetTranslationAsVector() const; // Get the translation as a homogeneous coordinates' point (W=1) Vector GetTranslationAsPoint() const; // // Scale // float GetScale() const; Vector GetScaleVector() const; Vector GetInverseScaleVector() const; void SetScale(float uniformScale); bool HasScale() const; bool HasNegativeScale() const; // This function will sanitize the scale values to remove any // trailing values from scale factors i.e. 1.000000012 will be converted to 1 // This is primarily needed in import steps where scale values // might be sampled from curves or have multiple conversions applied resulting in variance. void SanitizeScaleValue(); // // Transformations // Vector TranslateVector(const Vector& vector) const; Vector ScaleVector(const Vector& vector) const; Vector TransformPoint(const Vector& vector) const; Vector TransformPointNoScale(const Vector& vector) const; // Rotate a vector (same as TransformVectorNoScale) Vector RotateVector(const Vector& vector) const; // Rotate a vector (same as TransformVectorNoScale) Vector TransformNormal(const Vector& vector) const; // Unrotate a vector (same as InverseTransformVectorNoScale) Vector InverseRotateVector(const Vector& vector) const; // Invert the operation order when doing inverse transformation: first translation then rotation then scale Vector InverseTransformPoint(const Vector& point) const; // Invert the operation order when doing inverse transformation: first translation then rotation Vector InverseTransformPointNoScale(const Vector& point) const; // Applies scale and rotation to a vector (no translation) Vector TransformVector(const Vector& vector) const; // Rotate a vector Vector TransformVectorNoScale(const Vector& vector) const; // Invert the operation order when performing inverse transformation: first rotation then scale Vector InverseTransformVector(const Vector& vector) const; // Unrotate a vector Vector InverseTransformVectorNoScale(const Vector& vector) const; // WARNING: The results from multiplying transforms with shear or skew is ill-defined Transform operator*(const Transform& rhs) const; // WARNING: The results from multiplying transforms with shear or skew is ill-defined Transform& operator*=(const Transform& rhs); // // Operators // bool IsNearEqual( const Transform& rhs, const Radians angleThreshold = Math::DegreesToRadians, float translationScaleThreshold = Math::Epsilon) const; // Exact equality bool operator==(const Transform& rhs) const; bool operator!=(const Transform& rhs) const; private: Quaternion m_rotation = Quaternion(0, 0, 0, 1); Vector m_translationScale = Vector(0, 0, 0, 1); }; } #include "Transform.inl" ================================================ FILE: MotionCorrection/src/cpp/Math/Transform.inl ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Transform.h" namespace Math { inline Transform Transform::FromRotation(const Quaternion& rotation) { return Transform(rotation); } inline Transform Transform::FromTranslation(const Vector& translation) { return Transform(Quaternion::Identity, translation); } inline Transform Transform::FromScale(float uniformScale) { return Transform(Quaternion::Identity, Vector::Zero, uniformScale); } inline Transform Transform::FromTranslationAndScale(const Vector& translation, float uniformScale) { return Transform(Quaternion::Identity, translation, uniformScale); } inline Transform Transform::FromRotationBetweenVectors(const Vector sourceVector, const Vector targetVector) { return Transform(Quaternion::FromRotationBetweenNormalizedVectors(sourceVector, targetVector)); } inline Transform Transform::Lerp(const Transform& from, const Transform& to, float t) { Quaternion const rotation = Quaternion::NLerp(Quaternion(from.m_rotation), Quaternion(to.m_rotation), t); Vector const translationAndScale = Vector::Lerp(from.m_translationScale, to.m_translationScale, t); Transform lerped(NoInit); lerped.m_rotation = rotation; lerped.m_translationScale = translationAndScale; return lerped; } inline Transform Transform::Slerp(const Transform& from, const Transform& to, float t) { Quaternion const rotation = Quaternion::SLerp(Quaternion(from.m_rotation), Quaternion(to.m_rotation), t); Vector const translationAndScale = Vector::Lerp(Vector(from.m_translationScale), Vector(to.m_translationScale), t); Transform lerped(NoInit); lerped.m_rotation = rotation; lerped.m_translationScale = translationAndScale; return lerped; } inline Transform Transform::FastSlerp(const Transform& from, const Transform& to, float t) { Quaternion const rotation = Quaternion::FastSLerp(Quaternion(from.m_rotation), Quaternion(to.m_rotation), t); Vector const translationAndScale = Vector::Lerp(Vector(from.m_translationScale), Vector(to.m_translationScale), t); Transform lerped(NoInit); lerped.m_rotation = rotation; lerped.m_translationScale = translationAndScale; return lerped; } inline Transform Transform::Delta(const Transform& from, const Transform& to) { ASSERT(from.m_rotation.IsNormalized() && to.m_rotation.IsNormalized()); ASSERT(!from.m_translationScale.IsW0() && !to.m_translationScale.IsW0()); Transform result; Vector const inverseScale = from.GetInverseScaleVector(); Vector const deltaScale = to.GetScaleVector() * inverseScale; // If we have negative scaling, we need to use matrices to calculate the deltas Vector const minScale = Vector::Min(from.m_translationScale.GetSplatW(), to.m_translationScale.GetSplatW()); if (minScale.IsAnyLessThan(Vector::Zero)) { // Multiply the transforms using matrices to get the correct rotation and then remove the scale; Matrix const toMtx = to.ToMatrix(); Matrix const fromMtx = from.ToMatrix(); Matrix resultMtx = toMtx * fromMtx.GetInverse(); resultMtx.RemoveScaleFast(); // Apply back the signs from the final scale Vector const sign = deltaScale.GetSign(); resultMtx[0] *= sign.GetSplatX(); resultMtx[1] *= sign.GetSplatY(); resultMtx[2] *= sign.GetSplatZ(); result.m_rotation = resultMtx.GetRotation(); ASSERT(result.m_rotation.IsNormalized()); result.m_translationScale = Vector::Select(resultMtx.GetTranslation(), deltaScale, Vector::Select0001); } else { Quaternion const fromInverseRotation = from.m_rotation.GetInverse(); result.m_rotation = to.m_rotation * fromInverseRotation; Vector const deltaTranslation = to.m_translationScale - from.m_translationScale; Vector const translation = fromInverseRotation.RotateVector(deltaTranslation) * inverseScale; result.m_translationScale = Vector::Select(translation, deltaScale, Vector::Select0001); } return result; } inline Transform Transform::DeltaNoScale(const Transform& from, const Transform& to) { Quaternion const inverseFromRotation = from.m_rotation.GetInverse(); Vector const deltaTranslation = to.GetTranslation() - from.GetTranslation(); Transform delta; delta.m_rotation = to.m_rotation * inverseFromRotation; delta.m_translationScale = inverseFromRotation.RotateVector(deltaTranslation).GetWithW1(); return delta; } inline void Transform::DirectlySetRotation(Transform& transform, Quaternion&& rotation) { transform.m_rotation = rotation; } inline void Transform::DirectlySetRotation(Transform& transform, const Quaternion& rotation) { transform.m_rotation = rotation; } inline void Transform::DirectlySetTranslationScale(Transform& transform, Vector&& translationScale) { transform.m_translationScale = translationScale; } inline void Transform::DirectlySetTranslationScale(Transform& transform, const Vector& translationScale) { transform.m_translationScale = translationScale; } inline Transform::Transform(NoInit_t) : m_rotation(NoInit) , m_translationScale(NoInit) { } inline Transform::Transform(const Matrix& m) { Vector mTranslation, mScale; m.Decompose(m_rotation, mTranslation, mScale); ASSERT(Math::IsNearEqual(mScale.GetX(), mScale.GetY()) && Math::IsNearEqual(mScale.GetY(),mScale.GetZ())); m_translationScale = Vector::Select(mTranslation, mScale, Vector::Select0001); } inline Transform::Transform(const Quaternion& rotation, const Vector& translation, float scale) : m_rotation(rotation) , m_translationScale(Vector::Select(translation, Vector(scale), Vector::Select0001)) { } inline Transform::Transform(const AxisAngle& rotation) : m_rotation(rotation) , m_translationScale(Vector::UnitW) { } inline Matrix Transform::ToMatrix() const { return Matrix(m_rotation, m_translationScale.GetWithW1(), m_translationScale.GetSplatW()); } inline Matrix Transform::ToMatrixNoScale() const { return Matrix(m_rotation, m_translationScale.GetWithW1(), Vector::One); } inline EulerAngles Transform::ToEulerAngles() const { return m_rotation.ToEulerAngles(); } inline Vector Transform::GetAxisX() const { return m_rotation.RotateVector(Vector::UnitX); } inline Vector Transform::GetAxisY() const { return m_rotation.RotateVector(Vector::UnitY); } inline Vector Transform::GetAxisZ() const { return m_rotation.RotateVector(Vector::UnitZ); } inline Vector Transform::GetRightVector() const { return m_rotation.RotateVector(Vector::WorldRight); } inline Vector Transform::GetForwardVector() const { return m_rotation.RotateVector(Vector::WorldForward); } inline Vector Transform::GetUpVector() const { return m_rotation.RotateVector(Vector::WorldUp); } inline bool Transform::IsIdentity() const { return m_rotation.IsIdentity() && m_translationScale.IsEqual4(Vector::UnitW); } inline bool Transform::IsRigidTransform() const { return GetScale() == 1.0f; } inline void Transform::MakeRigidTransform() { SetScale(1.0f); } inline Transform& Transform::Inverse() { ASSERT(!m_translationScale.IsW0()); Quaternion const inverseRotation = m_rotation.GetInverse(); m_rotation = inverseRotation; Vector const inverseScale = GetInverseScaleVector(); Vector const inverselyScaledTranslation = inverseScale * m_translationScale.GetWithW0(); Vector const inverselyRotatedTranslation = inverseRotation.RotateVector(inverselyScaledTranslation); Vector const inverseTranslation = inverselyRotatedTranslation.GetNegated().SetW0(); m_translationScale = Vector::Select(inverseTranslation, inverseScale, Vector::Select0001); return *this; } inline Transform Transform::GetInverse() const { Transform inverse = *this; return inverse.Inverse(); } inline Transform Transform::GetDeltaToOther(const Transform& targetTransform) const { return Transform::Delta(*this, targetTransform); } inline Transform Transform::GetDeltaFromOther(const Transform& startTransform) const { return Transform::Delta(startTransform, *this); } inline const Quaternion& Transform::GetRotation() const { return m_rotation; } inline void Transform::SetRotation(const Quaternion& rotation) { ASSERT(rotation.IsNormalized()); m_rotation = rotation; } inline void Transform::AddRotation(const Quaternion& delta) { ASSERT(delta.IsNormalized()); m_rotation = delta * m_rotation; } inline const Vector& Transform::GetTranslation() const { return m_translationScale; } inline const Vector& Transform::GetTranslationAndScale() const { return m_translationScale; } inline void Transform::SetTranslation(const Vector& newTranslation) { m_translationScale = Vector::Select(newTranslation, m_translationScale, Vector::Select0001); } inline void Transform::SetTranslationAndScale(const Vector& newTranslationScale) { ASSERT(newTranslationScale.GetW() != 0.0f); m_translationScale = newTranslationScale; } inline void Transform::AddTranslation(const Vector& translationDelta) { m_translationScale += translationDelta.GetWithW0(); } inline Vector Transform::GetTranslationAsVector() const { return m_translationScale.GetWithW0(); } inline Vector Transform::GetTranslationAsPoint() const { return m_translationScale.GetWithW1(); } inline float Transform::GetScale() const { return m_translationScale.GetW(); } inline Vector Transform::GetScaleVector() const { return m_translationScale.GetSplatW(); } inline Vector Transform::GetInverseScaleVector() const { return m_translationScale.GetSplatW().GetInverse(); } inline void Transform::SetScale(float uniformScale) { m_translationScale.SetW(uniformScale); } inline bool Transform::HasScale() const { return m_translationScale.GetW() != 1.0f; } inline bool Transform::HasNegativeScale() const { return m_translationScale.GetW() < 0.0f; } inline Vector Transform::TranslateVector(const Vector& vector) const { return vector + m_translationScale.GetWithW0(); } inline Vector Transform::ScaleVector(const Vector& vector) const { return vector * GetScaleVector(); } inline Vector Transform::TransformPoint(const Vector& point) const { ASSERT(!m_translationScale.IsW0()); Vector transformedPoint = point * m_translationScale.GetSplatW(); transformedPoint = (m_translationScale + m_rotation.RotateVector(transformedPoint)).GetWithW0(); return transformedPoint; } inline Vector Transform::TransformPointNoScale(const Vector& point) const { Vector transformedPoint = (m_translationScale + m_rotation.RotateVector(point)).GetWithW0();; return transformedPoint; } inline Vector Transform::RotateVector(const Vector& vector) const { return m_rotation.RotateVector(vector); } inline Vector Transform::TransformNormal(const Vector& vector) const { return RotateVector(vector); } inline Vector Transform::InverseRotateVector(const Vector& vector) const { return m_rotation.RotateVectorInverse(vector); } inline Vector Transform::InverseTransformPoint(const Vector& point) const { ASSERT(!m_translationScale.IsW0()); Vector const shiftedPoint = point - m_translationScale; Vector const unrotatedShiftedPoint = m_rotation.RotateVectorInverse(shiftedPoint); Vector const inverseScale = GetInverseScaleVector(); Vector const result = unrotatedShiftedPoint * inverseScale; return result; } inline Vector Transform::InverseTransformPointNoScale(const Vector& point) const { Vector const shiftedPoint = point - m_translationScale; Vector const unrotatedShiftedPoint = m_rotation.RotateVectorInverse(shiftedPoint); return unrotatedShiftedPoint; } inline Vector Transform::TransformVector(const Vector& vector) const { ASSERT(!m_translationScale.IsW0()); Vector transformedVector = vector * GetScaleVector(); transformedVector = m_rotation.RotateVector(transformedVector); return transformedVector; } inline Vector Transform::TransformVectorNoScale(const Vector& vector) const { return RotateVector(vector); } inline Vector Transform::InverseTransformVector(const Vector& vector) const { ASSERT(!m_translationScale.IsW0()); Vector const unrotatedVector = m_rotation.RotateVectorInverse(vector); Vector const inverseScale = GetInverseScaleVector(); Vector const result = unrotatedVector * inverseScale; return result; } inline Vector Transform::InverseTransformVectorNoScale(const Vector& vector) const { return m_rotation.RotateVectorInverse(vector); } inline Transform Transform::operator*(const Transform& rhs) const { Transform transform = *this; transform *= rhs; return transform; } inline Transform& Transform::operator*=(const Transform& rhs) { Vector const scale = GetScaleVector(); Vector const rhsScale = rhs.GetScaleVector(); Vector const minScale = Vector::Min(scale, rhsScale); Vector const finalScale = scale * rhsScale; if (minScale.IsAnyLessThan(Vector::Zero)) { // Multiply the transforms using matrices to // get the correct rotation and then remove the scale; Matrix const lhsMtx = ToMatrix(); Matrix const rhsMtx = rhs.ToMatrix(); Matrix resultMtx = lhsMtx * rhsMtx; resultMtx.RemoveScaleFast(); // Apply back the signs from the final scale Vector const sign = finalScale.GetSign(); resultMtx[0] *= sign.GetSplatX(); resultMtx[1] *= sign.GetSplatY(); resultMtx[2] *= sign.GetSplatZ(); m_rotation = resultMtx.GetRotation(); ASSERT(m_rotation.IsNormalized()); m_translationScale = Vector::Select(resultMtx.GetTranslation(), finalScale, Vector::Select0001); } else { // Normal case m_rotation = m_rotation * rhs.m_rotation; m_rotation.Normalize(); Vector const translation = rhs.m_rotation.RotateVector(m_translationScale * rhsScale) + rhs.m_translationScale; m_translationScale = Vector::Select(translation, finalScale, Vector::Select0001); } return *this; } inline bool Transform::IsNearEqual(const Transform& rhs, const Radians angleThreshold, float translationScaleThreshold) const { if (!m_rotation.IsNearEqual(rhs.m_rotation, angleThreshold)) { return false; } if (!m_translationScale.IsNearEqual4(rhs.m_translationScale, translationScaleThreshold)) { return false; } return true; } inline bool Transform::operator==(const Transform& rhs) const { if (m_translationScale != rhs.m_translationScale) { return false; } if (m_rotation != rhs.m_rotation) { return false; } return true; } inline bool Transform::operator!=(const Transform& rhs) const { return !operator==(rhs); } } ================================================ FILE: MotionCorrection/src/cpp/Math/Types.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "Types.h" Int2 const Int2::Zero = Int2( 0, 0 ); Int4 const Int4::Zero = Int4( 0, 0, 0, 0 ); Int4 const Int4::MinusOne = Int4( -1, -1, -1, -1 ); Float2 const Float2::Zero = Float2( 0, 0 ); Float2 const Float2::One = Float2( 1, 1 ); Float2 const Float2::UnitX = Float2( 1, 0 ); Float2 const Float2::UnitY = Float2( 0, 1 ); Float3 const Float3::Zero = Float3( 0, 0, 0 ); Float3 const Float3::One = Float3( 1, 1, 1 ); Float3 const Float3::UnitX = Float3( 1, 0, 0 ); Float3 const Float3::UnitY = Float3( 0, 1, 0 ); Float3 const Float3::UnitZ = Float3( 0, 0, 1 ); Float3 const Float3::WorldForward = Float3( 0, -1, 0 ); Float3 const Float3::WorldUp = Float3( 0, 0, 1 ); Float3 const Float3::WorldRight = Float3( -1, 0, 0 ); Float4 const Float4::Zero = Float4( 0, 0, 0, 0 ); Float4 const Float4::One = Float4( 1, 1, 1, 1 ); Float4 const Float4::UnitX = Float4( 1, 0, 0, 0 ); Float4 const Float4::UnitY = Float4( 0, 1, 0, 0 ); Float4 const Float4::UnitZ = Float4( 0, 0, 1, 0 ); Float4 const Float4::UnitW = Float4( 0, 0, 0, 1 ); Float4 const Float4::WorldForward = Float4( 0, -1, 0, 0 ); Float4 const Float4::WorldUp = Float4( 0, 0, 1, 0 ); Float4 const Float4::WorldRight = Float4( -1, 0, 0, 0 ); Radians const Radians::Pi = Radians( Math::Pi ); Radians const Radians::TwoPi = Radians( Math::TwoPi ); Radians const Radians::OneDivPi = Radians( Math::OneDivPi ); Radians const Radians::OneDivTwoPi = Radians( Math::OneDivTwoPi ); Radians const Radians::PiDivTwo = Radians( Math::PiDivTwo ); Radians const Radians::PiDivFour = Radians( Math::PiDivFour ); ================================================ FILE: MotionCorrection/src/cpp/Math/Types.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Scalar.h" enum NoInit_t { NoInit }; enum ZeroInit_t { ZeroInit }; enum IdentityInit_t { IdentityInit }; enum class Axis : uint8_t { X = 0, Y, Z, NegX, NegY, NegZ }; struct Float2; struct Float3; struct Float4; struct Int2 { static Int2 const Zero; public: inline Int2() {} inline Int2( ZeroInit_t ) : m_x( 0 ), m_y( 0 ) {} inline Int2( Float2 const& v ); inline explicit Int2( int32_t v ) : m_x( v ), m_y( v ) {} inline explicit Int2( int32_t ix, int32_t iy ) : m_x( ix ), m_y( iy ) {} inline bool IsZero() const { return *this == Zero; } inline int32_t& operator[]( uint32_t i ) { return ( (int32_t*) this )[i]; } inline int32_t const& operator[]( uint32_t i ) const { return ( (int32_t*) this )[i]; } inline bool operator==( Int2 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y; } inline bool operator!=( Int2 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y; } inline Int2 operator+( Int2 const& rhs ) const { return Int2( m_x + rhs.m_x, m_y + rhs.m_y ); } inline Int2 operator-( Int2 const& rhs ) const { return Int2( m_x - rhs.m_x, m_y - rhs.m_y ); } inline Int2 operator*( Int2 const& rhs ) const { return Int2( m_x * rhs.m_x, m_y * rhs.m_y ); } inline Int2 operator/( Int2 const& rhs ) const { return Int2( m_x / rhs.m_x, m_y / rhs.m_y ); } inline Int2& operator+=( int32_t const& rhs ) { m_x += rhs; m_y += rhs; return *this; } inline Int2& operator-=( int32_t const& rhs ) { m_x -= rhs; m_y -= rhs; return *this; } inline Int2& operator*=( int32_t const& rhs ) { m_x *= rhs; m_y *= rhs; return *this; } inline Int2& operator/=( int32_t const& rhs ) { m_x /= rhs; m_y /= rhs; return *this; } // Component wise operation inline Int2 operator+( int32_t const& rhs ) const { return Int2( m_x + rhs, m_y + rhs ); } inline Int2 operator-( int32_t const& rhs ) const { return Int2( m_x - rhs, m_y - rhs ); } inline Int2 operator*( int32_t const& rhs ) const { return Int2( m_x * rhs, m_y * rhs ); } inline Int2 operator/( int32_t const& rhs ) const { return Int2( m_x / rhs, m_y / rhs ); } inline Int2& operator+=( Int2 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; return *this; } inline Int2& operator-=( Int2 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; return *this; } inline Int2& operator*=( Int2 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; return *this; } inline Int2& operator/=( Int2 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; return *this; } public: int32_t m_x, m_y; }; struct Int3 { static Int3 const Zero; public: inline Int3() {} inline Int3( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ) {} inline Int3( Float3 const& v ); inline explicit Int3( int32_t v ) : m_x( v ), m_y( v ), m_z( v ) {} inline explicit Int3( int32_t ix, int32_t iy, int32_t iz ) : m_x( ix ), m_y( iy ), m_z( iz ) {} inline bool IsZero() const { return *this == Zero; } inline int32_t& operator[]( uint32_t i ) { return ( (int32_t*) this )[i]; } inline int32_t const& operator[]( uint32_t i ) const { return ( (int32_t*) this )[i]; } inline bool operator==( Int3 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z; } inline bool operator!=( Int3 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z; } inline Int3 operator+( Int3 const& rhs ) const { return Int3( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z ); } inline Int3 operator-( Int3 const& rhs ) const { return Int3( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z ); } inline Int3 operator*( Int3 const& rhs ) const { return Int3( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z ); } inline Int3 operator/( Int3 const& rhs ) const { return Int3( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z ); } inline Int3& operator+=( int32_t const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; return *this; } inline Int3& operator-=( int32_t const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; return *this; } inline Int3& operator*=( int32_t const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; return *this; } inline Int3& operator/=( int32_t const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; return *this; } // Component wise operation inline Int3 operator+( int32_t const& rhs ) const { return Int3( m_x + rhs, m_y + rhs, m_z + rhs ); } inline Int3 operator-( int32_t const& rhs ) const { return Int3( m_x - rhs, m_y - rhs, m_z - rhs ); } inline Int3 operator*( int32_t const& rhs ) const { return Int3( m_x * rhs, m_y * rhs, m_z * rhs ); } inline Int3 operator/( int32_t const& rhs ) const { return Int3( m_x / rhs, m_y / rhs, m_z / rhs ); } inline Int3& operator+=( Int3 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; return *this; } inline Int3& operator-=( Int3 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; return *this; } inline Int3& operator*=( Int3 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; return *this; } inline Int3& operator/=( Int3 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; return *this; } public: int32_t m_x, m_y, m_z; }; struct Int4 { static Int4 const Zero; static Int4 const MinusOne; public: inline Int4() {} inline Int4( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ), m_w( 0 ) {} inline explicit Int4( int32_t v ) : m_x( v ), m_y( v ), m_z( v ), m_w( v ) {} inline explicit Int4( int32_t ix, int32_t iy, int32_t iz, int32_t iw ) : m_x( ix ), m_y( iy ), m_z( iz ), m_w( iw ) {} inline bool IsZero() const { return *this == Zero; } inline int32_t& operator[]( uint32_t i ) { return ( (int32_t*) this )[i]; } inline int32_t const& operator[]( uint32_t i ) const { return ( (int32_t*) this )[i]; } inline bool operator==( Int4 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z && m_w == rhs.m_w; } inline bool operator!=( Int4 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z || m_w != rhs.m_w; } inline Int4 operator+( int32_t const& rhs ) const { return Int4( m_x + rhs, m_y + rhs, m_z + rhs, m_w + rhs ); } inline Int4 operator-( int32_t const& rhs ) const { return Int4( m_x - rhs, m_y - rhs, m_z - rhs, m_w - rhs ); } inline Int4 operator*( int32_t const& rhs ) const { return Int4( m_x * rhs, m_y * rhs, m_z * rhs, m_w * rhs ); } inline Int4 operator/( int32_t const& rhs ) const { return Int4( m_x / rhs, m_y / rhs, m_z / rhs, m_w / rhs ); } inline Int4& operator+=( int32_t const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; m_w += rhs; return *this; } inline Int4& operator-=( int32_t const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; m_w -= rhs; return *this; } inline Int4& operator*=( int32_t const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; m_w *= rhs; return *this; } inline Int4& operator/=( int32_t const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; m_w /= rhs; return *this; } // Component wise operation inline Int4 operator+( Int4 const& rhs ) const { return Int4( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z, m_w + rhs.m_w ); } inline Int4 operator-( Int4 const& rhs ) const { return Int4( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z, m_w - rhs.m_w ); } inline Int4 operator*( Int4 const& rhs ) const { return Int4( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z, m_w * rhs.m_w ); } inline Int4 operator/( Int4 const& rhs ) const { return Int4( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z, m_w / rhs.m_w ); } inline Int4& operator+=( Int4 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; m_w += rhs.m_w; return *this; } inline Int4& operator-=( Int4 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; m_w -= rhs.m_w; return *this; } inline Int4& operator*=( Int4 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; m_w *= rhs.m_w; return *this; } inline Int4& operator/=( Int4 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; m_w /= rhs.m_w; return *this; } public: int32_t m_x, m_y, m_z, m_w; }; struct Float2 { static Float2 const Zero; static Float2 const One; static Float2 const UnitX; static Float2 const UnitY; public: inline Float2() {} FORCE_INLINE Float2( ZeroInit_t ) : m_x( 0 ), m_y( 0 ) {} FORCE_INLINE explicit Float2( float v ) : m_x( v ), m_y( v ) {} FORCE_INLINE explicit Float2( float ix, float iy ) : m_x( ix ), m_y( iy ) {} FORCE_INLINE explicit Float2( int32_t ix, int32_t iy ) : m_x( (float) ix ), m_y( (float) iy ) {} inline explicit Float2( Int2 const& v ) : m_x( (float) v.m_x ), m_y( (float) v.m_y ) {} inline explicit Float2( Float3 const& v ); inline explicit Float2( Float4 const& v ); inline bool IsZero() const { return *this == Zero; } inline float& operator[]( uint32_t i ) { return ( (float*) this )[i]; } inline float const& operator[]( uint32_t i ) const { return ( (float*) this )[i]; } FORCE_INLINE Float2 operator-() const { return Float2( -m_x, -m_y ); } inline bool operator==( Float2 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y; } inline bool operator!=( Float2 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y; } inline Float2 operator+( Float2 const& rhs ) const { return Float2( m_x + rhs.m_x, m_y + rhs.m_y ); } inline Float2 operator-( Float2 const& rhs ) const { return Float2( m_x - rhs.m_x, m_y - rhs.m_y ); } inline Float2 operator*( Float2 const& rhs ) const { return Float2( m_x * rhs.m_x, m_y * rhs.m_y ); } inline Float2 operator/( Float2 const& rhs ) const { return Float2( m_x / rhs.m_x, m_y / rhs.m_y ); } inline Float2 operator+( float const& rhs ) const { return Float2( m_x + rhs, m_y + rhs ); } inline Float2 operator-( float const& rhs ) const { return Float2( m_x - rhs, m_y - rhs ); } inline Float2 operator*( float const& rhs ) const { return Float2( m_x * rhs, m_y * rhs ); } inline Float2 operator/( float const& rhs ) const { return Float2( m_x / rhs, m_y / rhs ); } inline Float2& operator+=( Float2 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; return *this; } inline Float2& operator-=( Float2 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; return *this; } inline Float2& operator*=( Float2 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; return *this; } inline Float2& operator/=( Float2 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; return *this; } inline Float2& operator+=( float const& rhs ) { m_x += rhs; m_y += rhs; return *this; } inline Float2& operator-=( float const& rhs ) { m_x -= rhs; m_y -= rhs; return *this; } inline Float2& operator*=( float const& rhs ) { m_x *= rhs; m_y *= rhs; return *this; } inline Float2& operator/=( float const& rhs ) { m_x /= rhs; m_y /= rhs; return *this; } float m_x, m_y; }; struct Float3 { static Float3 const Zero; static Float3 const One; static Float3 const UnitX; static Float3 const UnitY; static Float3 const UnitZ; static Float3 const WorldForward; static Float3 const WorldUp; static Float3 const WorldRight; public: inline Float3() {} FORCE_INLINE Float3( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ) {} FORCE_INLINE explicit Float3( float v ) : m_x( v ), m_y( v ), m_z( v ) {} FORCE_INLINE explicit Float3( float ix, float iy, float iz ) : m_x( ix ), m_y( iy ), m_z( iz ) {} inline explicit Float3( Float2 const& v, float iz = 0.0f ) : m_x( v.m_x ), m_y( v.m_y ), m_z( iz ) {} inline explicit Float3( Float4 const& v ); inline bool IsZero() const { return *this == Zero; } inline float& operator[]( uint32_t i ) { return ( (float*) this )[i]; } inline float const& operator[]( uint32_t i ) const { return ( (float*) this )[i]; } FORCE_INLINE Float3 operator-() const { return Float3( -m_x, -m_y, -m_z ); } inline bool operator==( Float3 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z; } inline bool operator!=( Float3 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z; } inline operator Float2() const { return Float2( m_x, m_y ); } inline Float3 operator+( Float3 const& rhs ) const { return Float3( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z ); } inline Float3 operator-( Float3 const& rhs ) const { return Float3( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z ); } inline Float3 operator*( Float3 const& rhs ) const { return Float3( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z ); } inline Float3 operator/( Float3 const& rhs ) const { return Float3( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z ); } inline Float3 operator+( float const& rhs ) const { return Float3( m_x + rhs, m_y + rhs, m_z + rhs ); } inline Float3 operator-( float const& rhs ) const { return Float3( m_x - rhs, m_y - rhs, m_z - rhs ); } inline Float3 operator*( float const& rhs ) const { return Float3( m_x * rhs, m_y * rhs, m_z * rhs ); } inline Float3 operator/( float const& rhs ) const { return Float3( m_x / rhs, m_y / rhs, m_z / rhs ); } inline Float3& operator+=( Float3 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; return *this; } inline Float3& operator-=( Float3 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; return *this; } inline Float3& operator*=( Float3 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; return *this; } inline Float3& operator/=( Float3 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; return *this; } inline Float3& operator+=( float const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; return *this; } inline Float3& operator-=( float const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; return *this; } inline Float3& operator*=( float const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; return *this; } inline Float3& operator/=( float const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; return *this; } float m_x, m_y, m_z; }; struct Float4 { static Float4 const Zero; static Float4 const One; static Float4 const UnitX; static Float4 const UnitY; static Float4 const UnitZ; static Float4 const UnitW; static Float4 const WorldForward; static Float4 const WorldUp; static Float4 const WorldRight; public: Float4() {} FORCE_INLINE Float4( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ), m_w( 0 ) {} FORCE_INLINE explicit Float4( float v ) : m_x( v ), m_y( v ), m_z( v ), m_w( v ) {} FORCE_INLINE explicit Float4( float ix, float iy, float iz, float iw ) : m_x( ix ), m_y( iy ), m_z( iz ), m_w( iw ) {} explicit Float4( Float2 const& v, float iz = 0.0f, float iw = 0.0f ) : m_x( v.m_x ), m_y( v.m_y ), m_z( iz ), m_w( iw ) {} explicit Float4( Float3 const& v, float iw = 0.0f ) : m_x( v.m_x ), m_y( v.m_y ), m_z( v.m_z ), m_w( iw ) {} inline bool IsZero() const { return *this == Zero; } float& operator[]( uint32_t i ) { return ( (float*) this )[i]; } float const& operator[]( uint32_t i ) const { return ( (float*) this )[i]; } FORCE_INLINE Float4 operator-() const { return Float4( -m_x, -m_y, -m_z, -m_w ); } bool operator==( Float4 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z && m_w == rhs.m_w; } bool operator!=( Float4 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z || m_w != rhs.m_w; } inline operator Float2() const { return Float2( m_x, m_y ); } inline operator Float3() const { return Float3( m_x, m_y, m_z ); } inline Float4 operator+( Float4 const& rhs ) const { return Float4( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z, m_w + rhs.m_w ); } inline Float4 operator-( Float4 const& rhs ) const { return Float4( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z, m_w - rhs.m_w ); } inline Float4 operator*( Float4 const& rhs ) const { return Float4( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z, m_w * rhs.m_w ); } inline Float4 operator/( Float4 const& rhs ) const { return Float4( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z, m_w / rhs.m_w ); } inline Float4 operator+( float const& rhs ) const { return Float4( m_x + rhs, m_y + rhs, m_z + rhs, m_w + rhs ); } inline Float4 operator-( float const& rhs ) const { return Float4( m_x - rhs, m_y - rhs, m_z - rhs, m_w - rhs ); } inline Float4 operator*( float const& rhs ) const { return Float4( m_x * rhs, m_y * rhs, m_z * rhs, m_w * rhs ); } inline Float4 operator/( float const& rhs ) const { return Float4( m_x / rhs, m_y / rhs, m_z / rhs, m_w / rhs ); } inline Float4& operator+=( Float4 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; m_w += rhs.m_w; return *this; } inline Float4& operator-=( Float4 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; m_w -= rhs.m_w; return *this; } inline Float4& operator*=( Float4 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; m_w *= rhs.m_w; return *this; } inline Float4& operator/=( Float4 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; m_w /= rhs.m_w; return *this; } inline Float4& operator+=( float const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; m_w += rhs; return *this; } inline Float4& operator-=( float const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; m_w -= rhs; return *this; } inline Float4& operator*=( float const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; m_w *= rhs; return *this; } inline Float4& operator/=( float const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; m_w /= rhs; return *this; } float m_x, m_y, m_z, m_w; }; inline Int2::Int2( Float2 const& v ) : m_x( (int32_t) v.m_x ) , m_y( (int32_t) v.m_y ) { } inline Int3::Int3( Float3 const& v ) : m_x( (int32_t) v.m_x ) , m_y( (int32_t) v.m_y ) , m_z( (int32_t) v.m_z ) { } inline Float2::Float2( Float3 const& v ) : m_x( v.m_x ) , m_y( v.m_y ) { } inline Float2::Float2( Float4 const& v ) : m_x( v.m_x ) , m_y( v.m_y ) { } inline Float3::Float3( Float4 const& v ) : m_x( v.m_x ) , m_y( v.m_y ) , m_z( v.m_z ) { } struct Radians; struct Degrees; struct Degrees { public: inline Degrees() = default; inline Degrees( float degrees ) : m_value( degrees ) {} inline explicit Degrees( Radians const& radians ); FORCE_INLINE explicit operator float() const { return m_value; } FORCE_INLINE operator Radians() const; FORCE_INLINE float ToFloat() const { return m_value; } FORCE_INLINE Radians ToRadians() const; inline Degrees operator-() const { return Degrees( -m_value ); } inline Degrees operator+( Degrees const& rhs ) const { return Degrees( m_value + rhs.m_value ); } inline Degrees operator-( Degrees const& rhs ) const { return Degrees( m_value - rhs.m_value ); } inline Degrees operator*( Degrees const& rhs ) const { return Degrees( m_value * rhs.m_value ); } inline Degrees operator/( Degrees const& rhs ) const { return Degrees( m_value / rhs.m_value ); } inline Degrees& operator+=( Degrees const& rhs ) { m_value += rhs.m_value; return *this; } inline Degrees& operator-=( Degrees const& rhs ) { m_value -= rhs.m_value; return *this; } inline Degrees& operator*=( Degrees const& rhs ) { m_value *= rhs.m_value; return *this; } inline Degrees& operator/=( Degrees const& rhs ) { m_value /= rhs.m_value; return *this; } inline Degrees operator+( float const& rhs ) const { return Degrees( m_value + rhs ); } inline Degrees operator-( float const& rhs ) const { return Degrees( m_value - rhs ); } inline Degrees operator*( float const& rhs ) const { return Degrees( m_value * rhs ); } inline Degrees operator/( float const& rhs ) const { return Degrees( m_value / rhs ); } inline Degrees& operator+=( float const& rhs ) { m_value += rhs; return *this; } inline Degrees& operator-=( float const& rhs ) { m_value -= rhs; return *this; } inline Degrees& operator*=( float const& rhs ) { m_value *= rhs; return *this; } inline Degrees& operator/=( float const& rhs ) { m_value /= rhs; return *this; } inline Degrees operator+( int32_t const& rhs ) const { return Degrees( m_value + rhs ); } inline Degrees operator-( int32_t const& rhs ) const { return Degrees( m_value - rhs ); } inline Degrees operator*( int32_t const& rhs ) const { return Degrees( m_value * rhs ); } inline Degrees operator/( int32_t const& rhs ) const { return Degrees( m_value / rhs ); } inline Degrees& operator+=( int32_t const& rhs ) { m_value += rhs; return *this; } inline Degrees& operator-=( int32_t const& rhs ) { m_value -= rhs; return *this; } inline Degrees& operator*=( int32_t const& rhs ) { m_value *= rhs; return *this; } inline Degrees& operator/=( int32_t const& rhs ) { m_value /= rhs; return *this; } inline Degrees operator+( uint32_t const& rhs ) const { return Degrees( m_value + rhs ); } inline Degrees operator-( uint32_t const& rhs ) const { return Degrees( m_value - rhs ); } inline Degrees operator*( uint32_t const& rhs ) const { return Degrees( m_value * rhs ); } inline Degrees operator/( uint32_t const& rhs ) const { return Degrees( m_value / rhs ); } inline Degrees& operator+=( uint32_t const& rhs ) { m_value += rhs; return *this; } inline Degrees& operator-=( uint32_t const& rhs ) { m_value -= rhs; return *this; } inline Degrees& operator*=( uint32_t const& rhs ) { m_value *= rhs; return *this; } inline Degrees& operator/=( uint32_t const& rhs ) { m_value /= rhs; return *this; } inline bool operator>( float const& rhs ) const { return m_value > rhs; }; inline bool operator<( float const& rhs ) const { return m_value < rhs; } inline bool operator>=( float const& rhs ) const { return m_value >= rhs; } inline bool operator<=( float const& rhs ) const { return m_value <= rhs; } inline bool operator>( Degrees const& rhs ) const { return m_value > rhs.m_value; } inline bool operator<( Degrees const& rhs ) const { return m_value < rhs.m_value; } inline bool operator>=( Degrees const& rhs ) const { return m_value >= rhs.m_value; } inline bool operator<=( Degrees const& rhs ) const { return m_value <= rhs.m_value; } inline bool operator>( Radians const& rhs ) const; inline bool operator<( Radians const& rhs ) const; inline bool operator>=( Radians const& rhs ) const; inline bool operator<=( Radians const& rhs ) const; inline bool operator==( float const& v ) const { return Math::IsNearEqual( m_value, v ); } inline bool operator!=( float const& v ) const { return !Math::IsNearEqual( m_value, v ); } inline bool operator==( Degrees const& rhs ) const { return m_value == rhs.m_value; } inline bool operator!=( Degrees const& rhs ) const { return m_value != rhs.m_value; } inline bool operator==( Radians const& rhs ) const; inline bool operator!=( Radians const& rhs ) const; inline void Clamp( Degrees min, Degrees max ) { m_value = Math::Clamp( m_value, min.m_value, max.m_value ); } // Clamps between -360 and 360 inline void Clamp360() { m_value -= ( int32_t( m_value / 360.0f ) * 360.0f ); } // Clamps between -360 and 360 inline Degrees GetClamped360() const { Degrees d( m_value ); d.Clamp360(); return d; } // Clamps to -180 to 180 inline void Clamp180() { Clamp360(); float delta = 180 - Math::Abs( m_value ); if ( delta < 0 ) { delta += 180; m_value = ( m_value < 0 ) ? delta : -delta; } } // Clamps to -180 to 180 inline Degrees GetClamped180() const { Degrees r( m_value ); r.Clamp180(); return r; } // Clamps between 0 to 360 inline Degrees& ClampPositive360() { Clamp360(); if ( m_value < 0 ) { m_value += 360; } return *this; } // Clamps between 0 to 360 inline Degrees GetClampedPositive360() const { Degrees d( m_value ); d.ClampPositive360(); return d; } private: float m_value = 0; }; struct Radians { static Radians const Pi; static Radians const TwoPi; static Radians const OneDivPi; static Radians const OneDivTwoPi; static Radians const PiDivTwo; static Radians const PiDivFour; public: inline Radians() = default; inline Radians( float radians ) : m_value( radians ) {} inline explicit Radians( Degrees const& degrees ); FORCE_INLINE explicit operator float() const { return m_value; } FORCE_INLINE operator Degrees() const { return ToDegrees(); } FORCE_INLINE float ToFloat() const { return m_value; } FORCE_INLINE Degrees ToDegrees() const { return Degrees( m_value * Math::RadiansToDegrees ); } inline Radians operator-() const { return Radians( -m_value ); } inline Radians operator+( Radians const& rhs ) const { return Radians( m_value + rhs.m_value ); } inline Radians operator-( Radians const& rhs ) const { return Radians( m_value - rhs.m_value ); } inline Radians operator*( Radians const& rhs ) const { return Radians( m_value * rhs.m_value ); } inline Radians operator/( Radians const& rhs ) const { return Radians( m_value / rhs.m_value ); } inline Radians& operator+=( Radians const& rhs ) { m_value += rhs.m_value; return *this; } inline Radians& operator-=( Radians const& rhs ) { m_value -= rhs.m_value; return *this; } inline Radians& operator*=( Radians const& rhs ) { m_value *= rhs.m_value; return *this; } inline Radians& operator/=( Radians const& rhs ) { m_value /= rhs.m_value; return *this; } inline Radians operator+( float const& rhs ) const { return Radians( m_value + rhs ); } inline Radians operator-( float const& rhs ) const { return Radians( m_value - rhs ); } inline Radians operator*( float const& rhs ) const { return Radians( m_value * rhs ); } inline Radians operator/( float const& rhs ) const { return Radians( m_value / rhs ); } inline Radians& operator+=( float const& rhs ) { m_value += rhs; return *this; } inline Radians& operator-=( float const& rhs ) { m_value -= rhs; return *this; } inline Radians& operator*=( float const& rhs ) { m_value *= rhs; return *this; } inline Radians& operator/=( float const& rhs ) { m_value /= rhs; return *this; } inline Radians operator+( int32_t const& rhs ) const { return Radians( m_value + rhs ); } inline Radians operator-( int32_t const& rhs ) const { return Radians( m_value - rhs ); } inline Radians operator*( int32_t const& rhs ) const { return Radians( m_value * rhs ); } inline Radians operator/( int32_t const& rhs ) const { return Radians( m_value / rhs ); } inline Radians& operator+=( int32_t const& rhs ) { m_value += rhs; return *this; } inline Radians& operator-=( int32_t const& rhs ) { m_value -= rhs; return *this; } inline Radians& operator*=( int32_t const& rhs ) { m_value *= rhs; return *this; } inline Radians& operator/=( int32_t const& rhs ) { m_value /= rhs; return *this; } inline Radians operator+( uint32_t const& rhs ) const { return Radians( m_value + rhs ); } inline Radians operator-( uint32_t const& rhs ) const { return Radians( m_value - rhs ); } inline Radians operator*( uint32_t const& rhs ) const { return Radians( m_value * rhs ); } inline Radians operator/( uint32_t const& rhs ) const { return Radians( m_value / rhs ); } inline Radians& operator+=( uint32_t const& rhs ) { m_value += rhs; return *this; } inline Radians& operator-=( uint32_t const& rhs ) { m_value -= rhs; return *this; } inline Radians& operator*=( uint32_t const& rhs ) { m_value *= rhs; return *this; } inline Radians& operator/=( uint32_t const& rhs ) { m_value /= rhs; return *this; } inline bool operator>( float const& rhs ) const { return m_value > rhs; }; inline bool operator<( float const& rhs ) const { return m_value < rhs; } inline bool operator>=( float const& rhs ) const { return m_value >= rhs; } inline bool operator<=( float const& rhs ) const { return m_value <= rhs; } inline bool operator>( Radians const& rhs ) const { return m_value > rhs.m_value; } inline bool operator<( Radians const& rhs ) const { return m_value < rhs.m_value; } inline bool operator>=( Radians const& rhs ) const { return m_value >= rhs.m_value; } inline bool operator<=( Radians const& rhs ) const { return m_value <= rhs.m_value; } inline bool operator>( Degrees const& rhs ) const; inline bool operator<( Degrees const& rhs ) const; inline bool operator>=( Degrees const& rhs ) const; inline bool operator<=( Degrees const& rhs ) const; inline bool operator==( float const& v ) const { return Math::IsNearEqual( m_value, v ); } inline bool operator!=( float const& v ) const { return !Math::IsNearEqual( m_value, v ); } inline bool operator==( Radians const& rhs ) const { return m_value == rhs.m_value; } inline bool operator!=( Radians const& rhs ) const { return m_value != rhs.m_value; } inline bool operator==( Degrees const& rhs ) const; inline bool operator!=( Degrees const& rhs ) const; inline void Clamp( Radians min, Radians max ) { m_value = Math::Clamp( m_value, min.m_value, max.m_value ); } // Clamps between -2Pi to 2Pi inline void Clamp360() { m_value -= int32_t( m_value / Math::TwoPi ) * Math::TwoPi; } // Clamps between -2Pi to 2Pi inline Radians GetClamped360() const { Radians r( m_value ); r.Clamp360(); return r; } // Clamps between 0 to 2Pi inline void ClampPositive360() { Clamp360(); if( m_value < 0 ) { m_value += Math::TwoPi; } } // Clamps between 0 to 2Pi inline Radians GetClampedToPositive360() const { Radians r( m_value ); r.ClampPositive360(); return r; } // Clamps to -Pi to Pi inline void Clamp180() { Clamp360(); float delta = Math::Pi - Math::Abs( m_value ); if ( delta < 0 ) { delta += Math::Pi; m_value = ( m_value < 0 ) ? delta : -delta; } } // Clamps to -Pi to Pi inline Radians GetClamped180() const { Radians r( m_value ); r.Clamp180(); return r; } // Inverts angle between [0;2Pi] and [-2Pi;0] inline void Invert() { Clamp360(); float const delta = Math::TwoPi - Math::Abs( m_value ); m_value = ( m_value < 0 ) ? delta : -delta; } // Inverts angle between [0;2Pi] and [-2Pi;0] inline Radians GetInverse() const { Radians r( m_value ); r.Invert(); return r; } // Flips the front and rear 180 degree arc i.e. 135 becomes -45, -90 becomes 90, etc. inline void Flip() { Clamp180(); float const delta = Math::Pi - Math::Abs( m_value ); m_value = ( m_value < 0 ) ? delta : -delta; } // Flips the front and rear 180 degree arc i.e. 135 becomes -45, -90 becomes 90, etc. inline Radians GetFlipped() const { Radians r( m_value ); r.Flip(); return r; } private: float m_value = 0; }; inline Degrees::Degrees( Radians const& radians ) : m_value( radians.ToDegrees() ) {} inline Radians Degrees::ToRadians() const { return Radians( m_value * Math::DegreesToRadians ); } inline Degrees::operator Radians() const { return ToRadians(); } inline bool Degrees::operator>( Radians const& rhs ) const { return m_value > rhs.ToDegrees().m_value; } inline bool Degrees::operator<( Radians const& rhs ) const { return m_value < rhs.ToDegrees().m_value; } inline bool Degrees::operator>=( Radians const& rhs ) const { return m_value >= rhs.ToDegrees().m_value; } inline bool Degrees::operator<=( Radians const& rhs ) const { return m_value <= rhs.ToDegrees().m_value; } inline bool Degrees::operator==( Radians const& rhs ) const { return Math::IsNearEqual( m_value, rhs.ToDegrees().m_value ); } inline bool Degrees::operator!=( Radians const& rhs ) const { return !Math::IsNearEqual( m_value, rhs.ToDegrees().m_value ); } inline Radians::Radians( Degrees const& degrees ) : m_value( degrees.ToRadians() ) {} inline bool Radians::operator>( Degrees const& rhs ) const { return m_value > rhs.ToRadians().m_value; } inline bool Radians::operator<( Degrees const& rhs ) const { return m_value < rhs.ToRadians().m_value; } inline bool Radians::operator>=( Degrees const& rhs ) const { return m_value >= rhs.ToRadians().m_value; } inline bool Radians::operator<=( Degrees const& rhs ) const { return m_value <= rhs.ToRadians().m_value; } inline bool Radians::operator==( Degrees const& rhs ) const { return Math::IsNearEqual( m_value, rhs.ToRadians().m_value ); } inline bool Radians::operator!=( Degrees const& rhs ) const { return !Math::IsNearEqual( m_value, rhs.ToRadians().m_value ); } struct EulerAngles { public: EulerAngles() = default; inline explicit EulerAngles( Degrees inX, Degrees inY, Degrees inZ ) : m_x( inX ) , m_y( inY ) , m_z( inZ ) {} inline explicit EulerAngles( Radians inX, Radians inY, Radians inZ ) : m_x( inX ) , m_y( inY ) , m_z( inZ ) {} inline explicit EulerAngles( float inDegreesX, float inDegreesY, float inDegreesZ ) : m_x( Math::DegreesToRadians * inDegreesX ) , m_y( Math::DegreesToRadians * inDegreesY ) , m_z( Math::DegreesToRadians * inDegreesZ ) {} inline EulerAngles( Float3 const& anglesInDegrees ) : m_x( Math::DegreesToRadians * anglesInDegrees.m_x ) , m_y( Math::DegreesToRadians * anglesInDegrees.m_y ) , m_z( Math::DegreesToRadians * anglesInDegrees.m_z ) {} inline void Clamp() { m_x.Clamp360(); m_y.Clamp360(); m_z.Clamp360(); } inline EulerAngles GetClamped() const { EulerAngles clamped = *this; clamped.Clamp(); return clamped; } inline Radians GetYaw() const { return m_z; } inline Radians GetPitch() const { return m_x; } inline Radians GetRoll() const { return m_y; } inline Float3 GetAsRadians() const { return Float3( m_x.ToFloat(), m_y.ToFloat(), m_z.ToFloat() ); } inline Float3 GetAsDegrees() const { return Float3( m_x.ToDegrees().ToFloat(), m_y.ToDegrees().ToFloat(), m_z.ToDegrees().ToFloat() ); } inline bool operator==( EulerAngles const& other ) const { return m_x == other.m_x && m_y == other.m_y && m_z == other.m_z; } inline bool operator!=( EulerAngles const& other ) const { return m_x != other.m_x || m_y != other.m_y || m_z != other.m_z; } inline Radians& operator[]( uint32_t i ) { return ( (Radians*) this )[i]; } inline Radians const& operator[]( uint32_t i ) const { return ( (Radians*) this )[i]; } // in degrees inline Float3 ToFloat3() const { return Float3( Math::RadiansToDegrees * m_x.ToFloat(), Math::RadiansToDegrees * m_y.ToFloat(), Math::RadiansToDegrees * m_z.ToFloat() ); } public: Radians m_x = 0.0f; Radians m_y = 0.0f; Radians m_z = 0.0f; }; struct AxisAngle { public: inline AxisAngle() = default; inline explicit AxisAngle( Float3 axis, Radians angle ) : m_axis( axis ), m_angle( angle ) {} inline explicit AxisAngle( Float3 axis, Degrees angle ) : m_axis( axis ), m_angle( angle.ToRadians() ) {} inline bool IsValid() const { float const lengthSq = m_axis.m_x * m_axis.m_x + m_axis.m_y * m_axis.m_y + m_axis.m_z * m_axis.m_z; return Math::Abs( lengthSq - 1.0f ) < Math::Epsilon; } public: Float3 m_axis = Float3::Zero; Radians m_angle = Radians( 0.0f ); }; ================================================ FILE: MotionCorrection/src/cpp/Math/Vector.cpp ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "Vector.h" #include "Quaternion.h" namespace Math { Vector const Vector::UnitX = { 1, 0, 0, 0 }; Vector const Vector::UnitY = { 0, 1, 0, 0 }; Vector const Vector::UnitZ = { 0, 0, 1, 0 }; Vector const Vector::UnitW = { 0, 0, 0, 1 }; Vector const Vector::Origin = { 0, 0, 0, 1 }; Vector const Vector::WorldForward = { 0, -1, 0, 0 }; Vector const Vector::WorldBackward = { 0, 1, 0, 0 }; Vector const Vector::WorldUp = { 0, 0, 1, 0 }; Vector const Vector::WorldDown = { 0, 0, -1, 0 }; Vector const Vector::WorldLeft = { 1, 0, 0, 0 }; Vector const Vector::WorldRight = { -1, 0, 0, 0 }; Vector const Vector::Infinity = { 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000 }; Vector const Vector::QNaN = { 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000 }; Vector const Vector::NegativeOne(-1.0f); Vector const Vector::Zero(0.0f); Vector const Vector::Half(0.5f); Vector const Vector::One(1.0f); Vector const Vector::Epsilon(Math::Epsilon); Vector const Vector::LargeEpsilon(Math::LargeEpsilon); Vector const Vector::OneMinusEpsilon(1.0f - Math::Epsilon); Vector const Vector::EpsilonMinusOne(Math::Epsilon - 1.0f); Vector const Vector::NormalizeCheckThreshold(0.01f); // Squared Error Vector const Vector::Pi(Math::Pi); Vector const Vector::PiDivTwo(Math::PiDivTwo); Vector const Vector::TwoPi(Math::TwoPi); Vector const Vector::OneDivTwoPi(Math::OneDivTwoPi); Vector const Vector::Select0000(0, 0, 0, 0); Vector const Vector::Select0001(0, 0, 0, 1); Vector const Vector::Select0010(0, 0, 1, 0); Vector const Vector::Select0011(0, 0, 1, 1); Vector const Vector::Select0100(0, 1, 0, 0); Vector const Vector::Select0101(0, 1, 0, 1); Vector const Vector::Select0110(0, 1, 1, 0); Vector const Vector::Select0111(0, 1, 1, 1); Vector const Vector::Select1000(1, 0, 0, 0); Vector const Vector::Select1001(1, 0, 0, 1); Vector const Vector::Select1010(1, 0, 1, 0); Vector const Vector::Select1011(1, 0, 1, 1); Vector const Vector::Select1100(1, 1, 0, 0); Vector const Vector::Select1101(1, 1, 0, 1); Vector const Vector::Select1110(1, 1, 1, 0); Vector const Vector::Select1111(1, 1, 1, 1); Vector const Vector::BoxCorners[8] = { { -1.0f, -1.0f, 1.0f, 0.0f }, { 1.0f, -1.0f, 1.0f, 0.0f }, { 1.0f, 1.0f, 1.0f, 0.0f }, { -1.0f, 1.0f, 1.0f, 0.0f }, { -1.0f, -1.0f, -1.0f, 0.0f }, { 1.0f, -1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f, -1.0f, 0.0f }, { -1.0f, 1.0f, -1.0f, 0.0f }, }; Vector Vector::SLerp(const Vector& from, const Vector& to, float t) { ASSERT(t >= 0.0f && t <= 1.0f); if (from.LengthSquared3().IsLessThan4(Epsilon) || to.LengthSquared3().IsLessThan4(Epsilon)) { return Lerp(from, to, t); } // Calculate the final length const Vector fromLength = from.Length3(); const Vector toLength = to.Length3(); const Vector finalLength = Lerp(fromLength, toLength, t); // Normalize vectors const Vector normalizedFrom = from / fromLength; const Vector normalizedTo = to / toLength; // Handle parallel vector Vector result; if (normalizedFrom.IsParallelTo(normalizedTo)) { result = normalizedFrom; } else { // Interpolate the rotation between the vectors const Vector dot = Dot3(normalizedFrom, normalizedTo); const Vector angle = ACos(dot); const Vector axis = Cross3(normalizedFrom, normalizedTo).Normalize3(); const Vector interpolatedAngle = Lerp(Zero, angle, t); const Quaternion rotation(axis, Radians(interpolatedAngle.ToFloat())); const Vector finalDirection = rotation.RotateVector(normalizedFrom); result = finalDirection.GetNormalized3() * finalLength; } return result; } } ================================================ FILE: MotionCorrection/src/cpp/Math/Vector.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Compiler.h" #include "Types.h" #include "Constants.h" #include "SIMD.h" namespace Math { class alignas(16) Vector { public: static Vector const UnitX; static Vector const UnitY; static Vector const UnitZ; static Vector const UnitW; static Vector const Origin; static Vector const WorldForward; static Vector const WorldBackward; static Vector const WorldUp; static Vector const WorldDown; static Vector const WorldLeft; static Vector const WorldRight; static Vector const NegativeOne; static Vector const Zero; static Vector const Half; static Vector const One; static Vector const Epsilon; static Vector const LargeEpsilon; static Vector const OneMinusEpsilon; static Vector const EpsilonMinusOne; static Vector const NormalizeCheckThreshold; static Vector const Pi; static Vector const PiDivTwo; static Vector const TwoPi; static Vector const OneDivTwoPi; static Vector const Select0000; static Vector const Select0001; static Vector const Select0010; static Vector const Select0011; static Vector const Select0100; static Vector const Select0101; static Vector const Select0110; static Vector const Select0111; static Vector const Select1000; static Vector const Select1001; static Vector const Select1010; static Vector const Select1011; static Vector const Select1100; static Vector const Select1101; static Vector const Select1110; static Vector const Select1111; static Vector const Infinity; static Vector const QNaN; static Vector const BoxCorners[8]; // // Utils // static Vector Cross2(const Vector& v0, const Vector& v1); static Vector Cross3(const Vector& v0, const Vector& v1); static Vector Dot2(const Vector& v0, const Vector& v1); static Vector Dot3(const Vector& v0, const Vector& v1); static Vector Dot4(const Vector& v0, const Vector& v1); static Vector Average2(const Vector& v0, const Vector& v1); static Vector Average3(const Vector& v0, const Vector& v1); static Vector Average4(const Vector& v0, const Vector& v1); static Vector Min(const Vector& v0, const Vector& v1); static Vector Max(const Vector& v0, const Vector& v1); static float Min(const Vector& v); static float Max(const Vector& v); static Vector Clamp(const Vector& v, const Vector& min, const Vector& max); static Vector Xor(const Vector& vec0, const Vector& vec1); // Add the multiplied results to a vector: ( vec * mul ) + addend static Vector MultiplyAdd(const Vector& vec, const Vector& multiplier, const Vector& addend); // Subtract a vector from the multiplied result: (vec * mul ) - subtrahend static Vector MultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& subtrahend); // Subtract the multiplied result from a vector: minuend - (vec * mul ) static Vector NegativeMultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& minuend); // Sum up scaled versions of two vectors static Vector LinearCombination(const Vector& v0, const Vector& v1, float scale0, float scale1); // Linear interpolation of one vector to another static Vector Lerp(const Vector& from, const Vector& to, float t); // Normalized linear interpolation of one vector to another static Vector NLerp(const Vector& from, const Vector& to, float t); // Spherical interpolation of one vector to another static Vector SLerp(const Vector& from, const Vector& to, float t); // Combine the two vectors based on the control: 0 means select from v0, 1 means select from v1. E.G. To select XY from v0 and ZW from v1, control = Vector( 0, 0, 1, 1 ) static Vector Select(const Vector& v0, const Vector& v1, const Vector& control); // Get a permutation of two vectors, each template argument represents the element index to select ( v0: 0-3, v1: 4-7 ); template static Vector Permute(const Vector& v0, const Vector& v1); // // Trigonometry // static Vector Sin(const Vector& vec); static Vector Cos(const Vector& vec); static Vector Tan(const Vector& vec); static Vector ASin(const Vector& vec); static Vector ACos(const Vector& vec); static Vector ATan(const Vector& vec); static Vector ATan2(const Vector& vec0, const Vector& vec1); static Vector SinEst(const Vector& vec); static Vector CosEst(const Vector& vec); static Vector TanEst(const Vector& vec); static Vector ASinEst(const Vector& vec); static Vector ACosEst(const Vector& vec); static Vector ATanEst(const Vector& vec); static Vector ATan2Est(const Vector& vec0, const Vector& vec1); static void SinCos(Vector& sin, Vector& cos, float angle); static void SinCos(Vector& sin, Vector& cos, const Vector& angle); static Vector AngleMod2Pi(const Vector& angles); public: operator __m128& (); operator const __m128& () const; Vector(); explicit Vector(Axis axis); explicit Vector(ZeroInit_t); explicit Vector(float v); Vector(__m128 v); Vector(float ix, float iy, float iz, float iw = 1.0f); Vector(const Float2& v, float iz = 0.0f, float iw = 0.0f); Vector(const Float3& v, float iw = 1.0f); Vector(const Float4& v); Vector(const float* pValues); bool IsValid() const; void Store(float* pValues) const; void StoreFloat(float& value) const; void StoreFloat2(Float2& value) const; void StoreFloat3(Float3& value) const; void StoreFloat4(Float4& value) const; float ToFloat() const; Float2 ToFloat2() const; Float3 ToFloat3() const; Float4 ToFloat4() const; operator Float2() const; operator Float3() const; operator Float4() const; // // Element accessors // float GetX() const; float GetY() const; float GetZ() const; float GetW() const; void SetX(float x); void SetY(float y); void SetZ(float z); void SetW(float w); float operator[](uint32_t i) const; // // W component operations // bool IsW1() const; bool IsW0() const; Vector& SetW0(); Vector& SetW1(); Vector GetWithW0() const; Vector GetWithW1() const; // // Dimensional Getters // // Returns only the first two components, z=w=0 Vector Get2D() const; // Returns only the first three components, w = 0 Vector Get3D() const; // // Algebraic operators // Vector operator+(const Vector& v) const; Vector& operator+=(const Vector& v); Vector operator-(const Vector& v) const; Vector& operator-=(const Vector& v); Vector operator*(const Vector& v) const; Vector& operator*=(const Vector& v); Vector operator/(const Vector& v) const; Vector& operator/=(const Vector& v); Vector operator*(float const f) const; Vector& operator*=(float const f); Vector operator/(float const f) const; Vector& operator/=(float const f); Vector operator-() const; Vector Orthogonal2D() const; Vector Cross2(const Vector& other) const; Vector Cross3(const Vector& other) const; Vector Dot2(const Vector& other) const; Vector Dot3(const Vector& other) const; Vector Dot4(const Vector& other) const; float GetDot2(const Vector& other) const; float GetDot3(const Vector& other) const; float GetDot4(const Vector& other) const; Vector ScalarProjection(const Vector& other) const; float GetScalarProjection(const Vector& other) const; Vector VectorProjection(const Vector& other) const; // // Transformations // Vector& Invert(); Vector GetInverse() const; Vector GetReciprocal() const; Vector& InvertEst(); Vector GetInverseEst() const; Vector& Negate(); Vector GetNegated() const; Vector& Abs(); Vector GetAbs() const; Vector& Sqrt(); Vector GetSqrt(); Vector& ReciprocalSqrt(); Vector GetReciprocalSqrt(); Vector& EstimatedReciprocalSqrt(); Vector GetEstimatedReciprocalSqrt(); Vector& Normalize2(); Vector& Normalize3(); Vector& Normalize4(); Vector GetNormalized2() const; Vector GetNormalized3() const; Vector GetNormalized4() const; Vector& Floor(); Vector GetFloor() const; Vector& Ceil(); Vector GetCeil() const; Vector& Round(); Vector GetRound() const; Vector GetSign() const; // // Permutations // Vector GetSplatX() const; Vector GetSplatY() const; Vector GetSplatZ() const; Vector GetSplatW() const; // Get a shuffled version of this vector, each argument represents the element index in the original vector template Vector Swizzle() const; // Get a shuffled version of this vector, each argument represents the element index in the original vector Vector Swizzle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const; // Get a shuffled version of this vector, each argument represents the element index in the original vector Vector Shuffle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const; // Get a shuffled version of this vector, each argument represents the element index in the original vector template Vector Shuffle() const; // // Queries // Vector Length2() const; Vector Length3() const; Vector Length4() const; float GetLength2() const; float GetLength3() const; float GetLength4() const; Vector InverseLength2() const; Vector InverseLength3() const; Vector InverseLength4() const; float GetInverseLength2() const; float GetInverseLength3() const; float GetInverseLength4() const; Vector LengthSquared2() const; Vector LengthSquared3() const; Vector LengthSquared4() const; float GetLengthSquared2() const; float GetLengthSquared3() const; float GetLengthSquared4() const; Vector Distance2(const Vector& to) const; Vector Distance3(const Vector& to) const; Vector Distance4(const Vector& to) const; float GetDistance2(const Vector& to) const; float GetDistance3(const Vector& to) const; float GetDistance4(const Vector& to) const; Vector DistanceSquared2(const Vector& to) const; Vector DistanceSquared3(const Vector& to) const; Vector DistanceSquared4(const Vector& to) const; float GetDistanceSquared2(const Vector& to) const; float GetDistanceSquared3(const Vector& to) const; float GetDistanceSquared4(const Vector& to) const; bool IsNormalized2() const; bool IsNormalized3() const; bool IsNormalized4() const; // Is this vector within the range [-bounds, bounds] Vector InBounds(const Vector& bounds) const; bool IsInBounds2(const Vector& bounds) const; bool IsInBounds3(const Vector& bounds) const; bool IsInBounds4(const Vector& bounds) const; Vector Equal(const Vector& v) const; bool IsEqual2(const Vector& v) const; bool IsEqual3(const Vector& v) const; bool IsEqual4(const Vector& v) const; Vector NearEqual(const Vector& v, const Vector& epsilon) const; bool IsNearEqual2(const Vector& v, float epsilon) const; bool IsNearEqual3(const Vector& v, float epsilon) const; bool IsNearEqual4(const Vector& v, float epsilon) const; bool IsNearEqual2(const Vector& v, const Vector& epsilon = Vector::Epsilon) const; bool IsNearEqual3(const Vector& v, const Vector& epsilon = Vector::Epsilon) const; bool IsNearEqual4(const Vector& v, const Vector& epsilon = Vector::Epsilon) const; Vector GreaterThan(const Vector& v) const; bool IsAnyGreaterThan(const Vector& v) const; bool IsGreaterThan2(const Vector& v) const; bool IsGreaterThan3(const Vector& v) const; bool IsGreaterThan4(const Vector& v) const; Vector GreaterThanEqual(const Vector& v) const; bool IsAnyGreaterThanEqual(const Vector& v) const; bool IsGreaterThanEqual2(const Vector& v) const; bool IsGreaterThanEqual3(const Vector& v) const; bool IsGreaterThanEqual4(const Vector& v) const; Vector LessThan(const Vector& v) const; bool IsAnyLessThan(const Vector& v) const; bool IsLessThan2(const Vector& v) const; bool IsLessThan3(const Vector& v) const; bool IsLessThan4(const Vector& v) const; Vector LessThanEqual(const Vector& v) const; bool IsAnyLessThanEqual(const Vector& v) const; bool IsLessThanEqual2(const Vector& v) const; bool IsLessThanEqual3(const Vector& v) const; bool IsLessThanEqual4(const Vector& v) const; Vector EqualsZero() const; bool IsAnyEqualToZero2() const; bool IsAnyEqualToZero3() const; bool IsAnyEqualToZero4() const; bool IsZero2() const; bool IsZero3() const; bool IsZero4() const; Vector NearEqualsZero(float epsilon = Math::Epsilon) const; bool IsNearZero2(float epsilon = Math::Epsilon) const; bool IsNearZero3(float epsilon = Math::Epsilon) const; bool IsNearZero4(float epsilon = Math::Epsilon) const; Vector EqualsInfinity() const; bool IsInfinite2() const; bool IsInfinite3() const; bool IsInfinite4() const; Vector EqualsNaN() const; bool IsNaN2() const; bool IsNaN3() const; bool IsNaN4() const; bool IsParallelTo(const Vector& v) const; void ToDirectionAndLength2(Vector& direction, float& length) const; void ToDirectionAndLength3(Vector& direction, float& length) const; bool operator==(const Vector& rhs) const; bool operator!=(const Vector& rhs) const; public: __m128 m_data; }; static_assert(sizeof(Vector) == 16, "Vector size must be 16 bytes!"); } #include "Vector.inl" ================================================ FILE: MotionCorrection/src/cpp/Math/Vector.inl ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include #include "Vector.h" namespace Math { FORCE_INLINE Vector Vector::Cross2(const Vector& v0, const Vector& v1) { return v0.Cross2(v1); } FORCE_INLINE Vector Vector::Cross3(const Vector& v0, const Vector& v1) { return v0.Cross3(v1); } FORCE_INLINE Vector Vector::Dot2(const Vector& v0, const Vector& v1) { return v0.Dot2(v1); } FORCE_INLINE Vector Vector::Dot3(const Vector& v0, const Vector& v1) { return v0.Dot3(v1); } FORCE_INLINE Vector Vector::Dot4(const Vector& v0, const Vector& v1) { return v0.Dot4(v1); } FORCE_INLINE Vector Vector::Average2(const Vector& v0, const Vector& v1) { auto avg4 = Average4(v0, v1); return Vector::Select(avg4, Vector::Zero, Vector(0, 0, 1, 1)); } FORCE_INLINE Vector Vector::Average3(const Vector& v0, const Vector& v1) { auto avg4 = Average4(v0, v1); return Vector::Select(avg4, Vector::Zero, Vector(0, 0, 0, 1)); } FORCE_INLINE Vector Vector::Average4(const Vector& v0, const Vector& v1) { return (v0 + v1) * Vector::Half; } FORCE_INLINE Vector Vector::Min(const Vector& v0, const Vector& v1) { Vector result; result = _mm_min_ps(v0, v1); return result; } FORCE_INLINE Vector Vector::Max(const Vector& v0, const Vector& v1) { Vector result; result = _mm_max_ps(v0, v1); return result; } FORCE_INLINE float Vector::Min(const Vector& v) { __m128 shufReg, sumsReg; shufReg = _mm_movehdup_ps(v); sumsReg = _mm_min_ps(v, shufReg); shufReg = _mm_movehl_ps(shufReg, sumsReg); sumsReg = _mm_min_ss(sumsReg, shufReg); return _mm_cvtss_f32(sumsReg); } FORCE_INLINE float Vector::Max(const Vector& v) { __m128 shufReg, sumsReg; shufReg = _mm_movehdup_ps(v); sumsReg = _mm_max_ps(v, shufReg); shufReg = _mm_movehl_ps(shufReg, sumsReg); sumsReg = _mm_max_ss(sumsReg, shufReg); return _mm_cvtss_f32(sumsReg); } FORCE_INLINE Vector Vector::Clamp(const Vector& v, const Vector& min, const Vector& max) { Vector result; result = _mm_max_ps(min, v); result = _mm_min_ps(result, max); return result; } FORCE_INLINE Vector Vector::Xor(const Vector& v0, const Vector& v1) { __m128i V = _mm_xor_si128(_mm_castps_si128(v0), _mm_castps_si128(v1)); Vector result; result = _mm_castsi128_ps(V); return result; } FORCE_INLINE Vector Vector::MultiplyAdd(const Vector& v, const Vector& multiplier, const Vector& addend) { // result = addend + ( vec * multiplier ) Vector result; result = _mm_mul_ps(v, multiplier); result = _mm_add_ps(result, addend); return result; } FORCE_INLINE Vector Vector::MultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& subtrahend) { // result = ( vec * multiplier ) - subtrahend auto r = _mm_mul_ps(vec, multiplier); return _mm_sub_ps(r, subtrahend); } FORCE_INLINE Vector Vector::NegativeMultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& minuend) { // result = minuend - ( vec * multiplier ) auto r = _mm_mul_ps(vec, multiplier); return _mm_sub_ps(minuend, r); } FORCE_INLINE Vector Vector::LinearCombination(const Vector& v0, const Vector& v1, float scale0, float scale1) { return (v0 * scale0) + (v1 * scale1); } FORCE_INLINE Vector Vector::Lerp(const Vector& from, const Vector& to, float t) { ASSERT(t >= 0.0f && t <= 1.0f); Vector L = _mm_sub_ps(to, from); Vector S = _mm_set_ps1(t); Vector result; result = _mm_mul_ps(L, S); result = _mm_add_ps(result, from); return result; } FORCE_INLINE Vector Vector::NLerp(const Vector& from, const Vector& to, float t) { ASSERT(t >= 0.0f && t <= 1.0f); // Calculate the final length auto const fromLength = from.Length3(); auto const toLength = to.Length3(); auto const finalLength = Vector::Lerp(fromLength, toLength, t); // Normalize vectors Vector const normalizedFrom = from / fromLength; Vector const normalizedTo = to / toLength; // LERP auto const finalDirection = Lerp(normalizedFrom, normalizedTo, t); auto result = finalDirection.GetNormalized3() * finalLength; return result; } FORCE_INLINE Vector Vector::Select(const Vector& v0, const Vector& v1, const Vector& control) { auto const ctrl = _mm_cmpneq_ps(control, Vector::Zero); Vector result; auto vTemp1 = _mm_andnot_ps(ctrl, v0); auto vTemp2 = _mm_and_ps(v1, ctrl); result = _mm_or_ps(vTemp1, vTemp2); return result; } template FORCE_INLINE Vector Vector::Permute(const Vector& v0, const Vector& v1) { static_assert(PermuteX <= 7, "Element index parameter out of range"); static_assert(PermuteY <= 7, "Element index parameter out of range"); static_assert(PermuteZ <= 7, "Element index parameter out of range"); static_assert(PermuteW <= 7, "Element index parameter out of range"); uint32_t const shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3); bool const whichX = PermuteX > 3; bool const whichY = PermuteY > 3; bool const whichZ = PermuteZ > 3; bool const whichW = PermuteW > 3; static SIMD::UIntMask const selectMask = { whichX ? 0xFFFFFFFF : 0, whichY ? 0xFFFFFFFF : 0, whichZ ? 0xFFFFFFFF : 0, whichW ? 0xFFFFFFFF : 0 }; __m128 shuffled1 = _mm_shuffle_ps(v0, v0, shuffle); __m128 shuffled2 = _mm_shuffle_ps(v1, v1, shuffle); __m128 masked1 = _mm_andnot_ps(selectMask, shuffled1); __m128 masked2 = _mm_and_ps(selectMask, shuffled2); return _mm_or_ps(masked1, masked2); } FORCE_INLINE Vector Vector::Sin(const Vector& vec) { // Force the value within the bounds of pi auto m_x = Vector::AngleMod2Pi(vec); // Map in [-pi/2,pi/2] with sin(m_y) = sin(m_x). __m128 sign = _mm_and_ps(m_x, SIMD::g_signMask); __m128 c = _mm_or_ps(Vector::Pi, sign); // pi when m_x >= 0, -pi when m_x < 0 __m128 absx = _mm_andnot_ps(sign, m_x); // |m_x| __m128 rflx = _mm_sub_ps(c, m_x); __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo); __m128 select0 = _mm_and_ps(comp, m_x); __m128 select1 = _mm_andnot_ps(comp, rflx); m_x = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation const auto SC1 = SIMD::g_sinCoefficients1; auto vConstants = _mm_shuffle_ps(SC1, SC1, _MM_SHUFFLE(0, 0, 0, 0)); __m128 Result = _mm_mul_ps(vConstants, x2); const auto SC0 = SIMD::g_sinCoefficients0; vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(3, 3, 3, 3)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, m_x); return Result; } FORCE_INLINE Vector Vector::Cos(const Vector& vec) { // Map V to m_x in [-pi,pi]. auto m_x = Vector::AngleMod2Pi(vec); // Map in [-pi/2,pi/2] with cos(m_y) = sign*cos(m_x). auto sign = _mm_and_ps(m_x, SIMD::g_signMask); __m128 c = _mm_or_ps(Vector::Pi, sign); // pi when m_x >= 0, -pi when m_x < 0 __m128 absx = _mm_andnot_ps(sign, m_x); // |m_x| __m128 rflx = _mm_sub_ps(c, m_x); __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo); __m128 select0 = _mm_and_ps(comp, m_x); __m128 select1 = _mm_andnot_ps(comp, rflx); m_x = _mm_or_ps(select0, select1); select0 = _mm_and_ps(comp, Vector::One); select1 = _mm_andnot_ps(comp, Vector::NegativeOne); sign = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation const auto CC1 = SIMD::g_cosCoefficients1; auto vConstants = _mm_shuffle_ps(CC1, CC1, _MM_SHUFFLE(0, 0, 0, 0)); __m128 Result = _mm_mul_ps(vConstants, x2); const auto CC0 = SIMD::g_cosCoefficients0; vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(3, 3, 3, 3)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, sign); return Result; } FORCE_INLINE Vector Vector::Tan(const Vector& vec) { static const Vector tanCoefficients0 = { 1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f }; static const Vector tanCoefficients1 = { 4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f }; static const Vector tanConstants = { 1.570796371f, 6.077100628e-11f, 0.000244140625f, 0.63661977228f /*2 / Pi*/ }; static const SIMD::UIntMask mask = { 0x1, 0x1, 0x1, 0x1 }; Vector TwoDivPi = tanConstants.GetSplatW(); Vector C0 = tanConstants.GetSplatX(); Vector C1 = tanConstants.GetSplatY(); Vector vEpsilon = tanConstants.GetSplatZ(); Vector VA = (vec * TwoDivPi).Round(); Vector VC = Vector::NegativeMultiplySubtract(VA, C0, vec); Vector VB = VA.GetAbs(); VC = Vector::NegativeMultiplySubtract(VA, C1, VC); reinterpret_cast<__m128i*>(&VB)[0] = _mm_cvttps_epi32(VB); Vector VC2 = VC * VC; Vector T7 = tanCoefficients1.GetSplatW(); Vector T6 = tanCoefficients1.GetSplatZ(); Vector T4 = tanCoefficients1.GetSplatX(); Vector T3 = tanCoefficients0.GetSplatW(); Vector T5 = tanCoefficients1.GetSplatY(); Vector T2 = tanCoefficients0.GetSplatZ(); Vector T1 = tanCoefficients0.GetSplatY(); Vector T0 = tanCoefficients0.GetSplatX(); Vector VBIsEven = _mm_and_ps(VB, mask); VBIsEven = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(VBIsEven), _mm_castps_si128(Vector::Zero))); Vector N = Vector::MultiplyAdd(VC2, T7, T6); Vector D = Vector::MultiplyAdd(VC2, T4, T3); N = Vector::MultiplyAdd(VC2, N, T5); D = Vector::MultiplyAdd(VC2, D, T2); N = VC2 * N; D = Vector::MultiplyAdd(VC2, D, T1); N = Vector::MultiplyAdd(VC, N, VC); Vector VCNearZero = VC.InBounds(vEpsilon); D = Vector::MultiplyAdd(VC2, D, T0); N = Vector::Select(N, VC, VCNearZero); D = Vector::Select(D, Vector::One, VCNearZero); Vector R0 = N.GetNegated(); Vector R1 = N / D; R0 = D / R0; Vector VIsZero = vec.EqualsZero(); Vector Result = Vector::Select(R0, R1, VBIsEven); Result = Vector::Select(Result, Zero, VIsZero); return Result; } FORCE_INLINE Vector Vector::ASin(const Vector& vec) { __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero); __m128 mvalue = _mm_sub_ps(Vector::Zero, vec); __m128 m_x = _mm_max_ps(vec, mvalue); // |vec| // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number. __m128 oneMValue = _mm_sub_ps(Vector::One, m_x); __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue); __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|vec|) // Compute polynomial approximation const auto AC1 = SIMD::g_arcCoefficients1; auto vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(3, 3, 3, 3)); __m128 t0 = _mm_mul_ps(vConstants, m_x); vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(2, 2, 2, 2)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(1, 1, 1, 1)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(0, 0, 0, 0)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); const auto AC0 = SIMD::g_arcCoefficients0; vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(3, 3, 3, 3)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(2, 2, 2, 2)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(1, 1, 1, 1)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(0, 0, 0, 0)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, root); __m128 t1 = _mm_sub_ps(Vector::Pi, t0); t0 = _mm_and_ps(nonnegative, t0); t1 = _mm_andnot_ps(nonnegative, t1); t0 = _mm_or_ps(t0, t1); t0 = _mm_sub_ps(Vector::PiDivTwo, t0); return t0; } FORCE_INLINE Vector Vector::ACos(const Vector& vec) { __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero); __m128 mvalue = _mm_sub_ps(Vector::Zero, vec); __m128 m_x = _mm_max_ps(vec, mvalue); // |vec| // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number. __m128 oneMValue = _mm_sub_ps(Vector::One, m_x); __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue); __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|vec|) // Compute polynomial approximation const auto AC1 = SIMD::g_arcCoefficients1; auto vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(3, 3, 3, 3)); __m128 t0 = _mm_mul_ps(vConstants, m_x); vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(2, 2, 2, 2)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(1, 1, 1, 1)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(0, 0, 0, 0)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); const auto AC0 = SIMD::g_arcCoefficients0; vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(3, 3, 3, 3)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(2, 2, 2, 2)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(1, 1, 1, 1)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(0, 0, 0, 0)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, root); __m128 t1 = _mm_sub_ps(Vector::Pi, t0); t0 = _mm_and_ps(nonnegative, t0); t1 = _mm_andnot_ps(nonnegative, t1); t0 = _mm_or_ps(t0, t1); return t0; } FORCE_INLINE Vector Vector::ATan(const Vector& vec) { __m128 absV = vec.GetAbs(); __m128 invV = _mm_div_ps(Vector::One, vec); __m128 comp = _mm_cmpgt_ps(vec, Vector::One); __m128 select0 = _mm_and_ps(comp, Vector::One); __m128 select1 = _mm_andnot_ps(comp, Vector::NegativeOne); __m128 sign = _mm_or_ps(select0, select1); comp = _mm_cmple_ps(absV, Vector::One); select0 = _mm_and_ps(comp, Vector::Zero); select1 = _mm_andnot_ps(comp, sign); sign = _mm_or_ps(select0, select1); select0 = _mm_and_ps(comp, vec); select1 = _mm_andnot_ps(comp, invV); __m128 m_x = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation Vector const TC1 = SIMD::g_aTanCoefficients1; Vector vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(3, 3, 3, 3)); __m128 Result = _mm_mul_ps(vConstants, x2); vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Vector const TC0 = SIMD::g_aTanCoefficients0; vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(3, 3, 3, 3)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, m_x); __m128 result1 = _mm_mul_ps(sign, Vector::PiDivTwo); result1 = _mm_sub_ps(result1, Result); comp = _mm_cmpeq_ps(sign, Vector::Zero); select0 = _mm_and_ps(comp, Result); select1 = _mm_andnot_ps(comp, result1); Result = _mm_or_ps(select0, select1); return Result; } FORCE_INLINE Vector Vector::ATan2(const Vector& Y, const Vector& X) { Vector ATanResultValid = Vector(SIMD::g_trueMask); Vector vPi = Vector(SIMD::g_aTan2Constants).GetSplatX(); Vector vPiOverTwo = Vector(SIMD::g_aTan2Constants).GetSplatY(); Vector vPiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatZ(); Vector vThreePiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatW(); Vector YEqualsZero = Y.EqualsZero(); Vector XEqualsZero = X.EqualsZero(); Vector XIsPositive = _mm_and_ps(X, SIMD::g_signMask); XIsPositive = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XIsPositive), _mm_castps_si128(Vector::Zero))); Vector YEqualsInfinity = Y.EqualsInfinity(); Vector XEqualsInfinity = X.EqualsInfinity(); Vector YSign = _mm_and_ps(Y, SIMD::g_signMask); vPi = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPi), _mm_castps_si128(YSign))); vPiOverTwo = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverTwo), _mm_castps_si128(YSign))); vPiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverFour), _mm_castps_si128(YSign))); vThreePiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vThreePiOverFour), _mm_castps_si128(YSign))); Vector R1 = Vector::Select(vPi, YSign, XIsPositive); Vector R2 = Vector::Select(ATanResultValid, vPiOverTwo, XEqualsZero); Vector R3 = Vector::Select(R2, R1, YEqualsZero); Vector R4 = Vector::Select(vThreePiOverFour, vPiOverFour, XIsPositive); Vector R5 = Vector::Select(vPiOverTwo, R4, XEqualsInfinity); Vector Result = Vector::Select(R3, R5, YEqualsInfinity); ATanResultValid = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(Result), _mm_castps_si128(ATanResultValid))); Vector V = Y / X; Vector R0 = Vector::ATan(V); R1 = Vector::Select(vPi, Vector(SIMD::g_signMask), XIsPositive); R2 = R0 + R1; return Vector::Select(Result, R2, ATanResultValid); } FORCE_INLINE Vector Vector::SinEst(const Vector& vec) { // Force the value within the bounds of pi auto m_x = Vector::AngleMod2Pi(vec); // Map in [-pi/2,pi/2] with sin(m_y) = sin(m_x). __m128 sign = _mm_and_ps(m_x, SIMD::g_signMask); __m128 c = _mm_or_ps(Vector::Pi, sign); // pi when m_x >= 0, -pi when m_x < 0 __m128 absx = _mm_andnot_ps(sign, m_x); // |m_x| __m128 rflx = _mm_sub_ps(c, m_x); __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo); __m128 select0 = _mm_and_ps(comp, m_x); __m128 select1 = _mm_andnot_ps(comp, rflx); m_x = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation const auto SEC = SIMD::g_sinCoefficients1; auto vConstants = _mm_shuffle_ps(SEC, SEC, _MM_SHUFFLE(3, 3, 3, 3)); __m128 Result = _mm_mul_ps(vConstants, x2); vConstants = _mm_shuffle_ps(SEC, SEC, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SEC, SEC, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, m_x); return Result; } FORCE_INLINE Vector Vector::CosEst(const Vector& vec) { // Map V to m_x in [-pi,pi]. auto m_x = Vector::AngleMod2Pi(vec); // Map in [-pi/2,pi/2] with cos(m_y) = sign*cos(m_x). auto sign = _mm_and_ps(m_x, SIMD::g_signMask); __m128 c = _mm_or_ps(Vector::Pi, sign); // pi when m_x >= 0, -pi when m_x < 0 __m128 absx = _mm_andnot_ps(sign, m_x); // |m_x| __m128 rflx = _mm_sub_ps(c, m_x); __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo); __m128 select0 = _mm_and_ps(comp, m_x); __m128 select1 = _mm_andnot_ps(comp, rflx); m_x = _mm_or_ps(select0, select1); select0 = _mm_and_ps(comp, Vector::One); select1 = _mm_andnot_ps(comp, Vector::NegativeOne); sign = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation const auto CEC = SIMD::g_cosCoefficients1; auto vConstants = _mm_shuffle_ps(CEC, CEC, _MM_SHUFFLE(3, 3, 3, 3)); __m128 Result = _mm_mul_ps(vConstants, x2); vConstants = _mm_shuffle_ps(CEC, CEC, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CEC, CEC, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, sign); return Result; } FORCE_INLINE Vector Vector::TanEst(const Vector& vec) { Vector W = Vector(SIMD::g_tanEstCoefficients).GetSplatW(); Vector V1 = (vec * W).Round(); V1 = Vector::NegativeMultiplySubtract(Vector::Pi, V1, vec); Vector const T0 = Vector(SIMD::g_tanEstCoefficients).GetSplatX(); Vector const T1 = Vector(SIMD::g_tanEstCoefficients).GetSplatY(); Vector const T2 = Vector(SIMD::g_tanEstCoefficients).GetSplatZ(); auto V2T2 = Vector::NegativeMultiplySubtract(V1, V1, T2); auto V2 = V1 * V1; auto V1T0 = V1 * T0; auto V1T1 = V1 * T1; auto N = Vector::MultiplyAdd(V2, V1T1, V1T0); auto D = V2T2.GetInverseEst(); return N * D; } FORCE_INLINE Vector Vector::ASinEst(const Vector& vec) { __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero); __m128 mvalue = _mm_sub_ps(Vector::Zero, vec); __m128 m_x = _mm_max_ps(vec, mvalue); // |vec| // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number. __m128 oneMValue = _mm_sub_ps(Vector::One, m_x); __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue); __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|vec|) // Compute polynomial approximation const auto AEC = SIMD::g_arcEstCoefficients; auto vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(3, 3, 3, 3)); __m128 t0 = _mm_mul_ps(vConstants, m_x); vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(2, 2, 2, 2)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(1, 1, 1, 1)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(0, 0, 0, 0)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, root); __m128 t1 = _mm_sub_ps(Vector::Pi, t0); t0 = _mm_and_ps(nonnegative, t0); t1 = _mm_andnot_ps(nonnegative, t1); t0 = _mm_or_ps(t0, t1); t0 = _mm_sub_ps(Vector::PiDivTwo, t0); return t0; } FORCE_INLINE Vector Vector::ACosEst(const Vector& vec) { __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero); __m128 mvalue = _mm_sub_ps(Vector::Zero, vec); __m128 m_x = _mm_max_ps(vec, mvalue); // |vec| // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number. __m128 oneMValue = _mm_sub_ps(Vector::One, m_x); __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue); __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|vec|) // Compute polynomial approximation auto vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(3, 3, 3, 3)); __m128 t0 = _mm_mul_ps(vConstants, m_x); vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(2, 2, 2, 2)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(1, 1, 1, 1)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, m_x); vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(0, 0, 0, 0)); t0 = _mm_add_ps(t0, vConstants); t0 = _mm_mul_ps(t0, root); __m128 t1 = _mm_sub_ps(Vector::Pi, t0); t0 = _mm_and_ps(nonnegative, t0); t1 = _mm_andnot_ps(nonnegative, t1); t0 = _mm_or_ps(t0, t1); return t0; } FORCE_INLINE Vector Vector::ATanEst(const Vector& vec) { __m128 absV = vec.GetAbs(); __m128 invV = _mm_div_ps(Vector::One, vec); __m128 comp = _mm_cmpgt_ps(vec, Vector::One); __m128 select0 = _mm_and_ps(comp, Vector::One); __m128 select1 = _mm_andnot_ps(comp, Vector::NegativeOne); __m128 sign = _mm_or_ps(select0, select1); comp = _mm_cmple_ps(absV, Vector::One); select0 = _mm_and_ps(comp, Vector::Zero); select1 = _mm_andnot_ps(comp, sign); sign = _mm_or_ps(select0, select1); select0 = _mm_and_ps(comp, vec); select1 = _mm_andnot_ps(comp, invV); __m128 m_x = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation Vector const AEC = SIMD::g_aTanEstCoefficients1; Vector vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(3, 3, 3, 3)); __m128 Result = _mm_mul_ps(vConstants, x2); vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); // ATanEstCoefficients0 is already splatted Result = _mm_add_ps(Result, SIMD::g_aTanEstCoefficients0); Result = _mm_mul_ps(Result, m_x); __m128 result1 = _mm_mul_ps(sign, Vector::PiDivTwo); result1 = _mm_sub_ps(result1, Result); comp = _mm_cmpeq_ps(sign, Vector::Zero); select0 = _mm_and_ps(comp, Result); select1 = _mm_andnot_ps(comp, result1); Result = _mm_or_ps(select0, select1); return Result; } FORCE_INLINE Vector Vector::ATan2Est(const Vector& X, const Vector& Y) { Vector ATanResultValid = Vector(SIMD::g_trueMask); Vector vPi = Vector(SIMD::g_aTan2Constants).GetSplatX(); Vector vPiOverTwo = Vector(SIMD::g_aTan2Constants).GetSplatY(); Vector vPiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatZ(); Vector vThreePiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatW(); Vector YEqualsZero = Y.EqualsZero(); Vector XEqualsZero = X.EqualsZero(); Vector XIsPositive = _mm_and_ps(X, SIMD::g_signMask); XIsPositive = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XIsPositive), _mm_castps_si128(Vector::Zero))); Vector YEqualsInfinity = Y.EqualsInfinity(); Vector XEqualsInfinity = X.EqualsInfinity(); Vector YSign = _mm_and_ps(Y, SIMD::g_signMask); vPi = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPi), _mm_castps_si128(YSign))); vPiOverTwo = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverTwo), _mm_castps_si128(YSign))); vPiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverFour), _mm_castps_si128(YSign))); vThreePiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vThreePiOverFour), _mm_castps_si128(YSign))); Vector R1 = Vector::Select(vPi, YSign, XIsPositive); Vector R2 = Vector::Select(ATanResultValid, vPiOverTwo, XEqualsZero); Vector R3 = Vector::Select(R2, R1, YEqualsZero); Vector R4 = Vector::Select(vThreePiOverFour, vPiOverFour, XIsPositive); Vector R5 = Vector::Select(vPiOverTwo, R4, XEqualsInfinity); Vector Result = Vector::Select(R3, R5, YEqualsInfinity); ATanResultValid = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(Result), _mm_castps_si128(ATanResultValid))); Vector Reciprocal = X.GetInverseEst(); Vector V = Y * Reciprocal; Vector R0 = Vector::ATanEst(V); R1 = Vector::Select(vPi, Vector(SIMD::g_signMask), XIsPositive); R2 = R0 + R1; Result = Vector::Select(Result, R2, ATanResultValid); return Result; } FORCE_INLINE void Vector::SinCos(Vector& sin, Vector& cos, float angle) { return SinCos(sin, cos, Vector(angle)); } FORCE_INLINE void Vector::SinCos(Vector& sin, Vector& cos, const Vector& angle) { // Force the value within the bounds of pi auto m_x = Vector::AngleMod2Pi(angle); // Map in [-pi/2,pi/2] with sin(m_y) = sin(m_x), cos(m_y) = sign*cos(m_x). auto sign = _mm_and_ps(m_x, SIMD::g_signMask); __m128 c = _mm_or_ps(Vector::Pi, sign); // pi when m_x >= 0, -pi when m_x < 0 __m128 absx = _mm_andnot_ps(sign, m_x); // |m_x| __m128 rflx = _mm_sub_ps(c, m_x); __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo); __m128 select0 = _mm_and_ps(comp, m_x); __m128 select1 = _mm_andnot_ps(comp, rflx); m_x = _mm_or_ps(select0, select1); select0 = _mm_and_ps(comp, Vector::One); select1 = _mm_andnot_ps(comp, Vector::NegativeOne); sign = _mm_or_ps(select0, select1); __m128 x2 = _mm_mul_ps(m_x, m_x); // Compute polynomial approximation of sine const auto SC1 = SIMD::g_sinCoefficients1; auto vConstants = _mm_shuffle_ps(SC1, SC1, _MM_SHUFFLE(0, 0, 0, 0)); __m128 Result = _mm_mul_ps(vConstants, x2); const auto SC0 = SIMD::g_sinCoefficients0; vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(3, 3, 3, 3)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, m_x); sin = Result; // Compute polynomial approximation of cosine const auto CC1 = SIMD::g_cosCoefficients1; vConstants = _mm_shuffle_ps(CC1, CC1, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_mul_ps(vConstants, x2); const auto CC0 = SIMD::g_cosCoefficients0; vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(3, 3, 3, 3)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(2, 2, 2, 2)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(1, 1, 1, 1)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(0, 0, 0, 0)); Result = _mm_add_ps(Result, vConstants); Result = _mm_mul_ps(Result, x2); Result = _mm_add_ps(Result, Vector::One); Result = _mm_mul_ps(Result, sign); cos = Result; } FORCE_INLINE Vector Vector::AngleMod2Pi(const Vector& angles) { // Modulo the range of the given angles such that -Pi <= Angles < Pi Vector result = _mm_mul_ps(angles, Vector::OneDivTwoPi); result.Round(); result = _mm_mul_ps(result, Vector::TwoPi); result = _mm_sub_ps(angles, result); return result; } FORCE_INLINE Vector::operator __m128& () { return m_data; } FORCE_INLINE Vector::operator const __m128& () const { return m_data; } FORCE_INLINE Vector::Vector() { } FORCE_INLINE Vector::Vector(Axis axis) { switch (axis) { case Axis::X: *this = Vector::UnitX; break; case Axis::Y: *this = Vector::UnitY; break; case Axis::Z: *this = Vector::UnitZ; break; default: HALT(); break; } } FORCE_INLINE Vector::Vector(ZeroInit_t) { memset(this, 0, sizeof(Vector)); } FORCE_INLINE Vector::Vector(float v) { m_data = _mm_set1_ps(v); } FORCE_INLINE Vector::Vector(__m128 v) : m_data(v) { } FORCE_INLINE Vector::Vector(float ix, float iy, float iz, float iw) { m_data = _mm_set_ps(iw, iz, iy, ix); } FORCE_INLINE Vector::Vector(const Float2& v, float iz, float iw) { m_data = _mm_set_ps(iw, iz, v.m_y, v.m_x); } FORCE_INLINE Vector::Vector(const Float3& v, float iw) { m_data = _mm_set_ps(iw, v.m_z, v.m_y, v.m_x); } FORCE_INLINE Vector::Vector(const Float4& v) { m_data = _mm_loadu_ps(&v.m_x); } FORCE_INLINE Vector::Vector(const float* pValues) { m_data = _mm_loadu_ps(pValues); } FORCE_INLINE bool Vector::IsValid() const { return !IsNaN4() && !IsInfinite4(); } FORCE_INLINE void Vector::Store(float* pValues) const { _mm_storeu_ps(pValues, m_data); } FORCE_INLINE void Vector::StoreFloat(float& value) const { _mm_store_ss(&value, m_data); } FORCE_INLINE void Vector::StoreFloat2(Float2& value) const { auto yVec = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1)); _mm_store_ss(&value.m_x, m_data); _mm_store_ss(&value.m_y, yVec); } FORCE_INLINE void Vector::StoreFloat3(Float3& value) const { auto yVec = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1)); auto zVec = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2)); _mm_store_ss(&value.m_x, m_data); _mm_store_ss(&value.m_y, yVec); _mm_store_ss(&value.m_z, zVec); } FORCE_INLINE void Vector::StoreFloat4(Float4& value) const { _mm_storeu_ps(&value.m_x, m_data); } FORCE_INLINE float Vector::ToFloat() const { float v; StoreFloat(v); return v; } FORCE_INLINE Float2 Vector::ToFloat2() const { Float2 v; StoreFloat2(v); return v; } FORCE_INLINE Float3 Vector::ToFloat3() const { Float3 v; StoreFloat3(v); return v; } FORCE_INLINE Float4 Vector::ToFloat4() const { Float4 v; StoreFloat4(v); return v; } FORCE_INLINE Vector::operator Float2() const { return ToFloat2(); } FORCE_INLINE Vector::operator Float3() const { return ToFloat3(); } FORCE_INLINE Vector::operator Float4() const { return ToFloat4(); } FORCE_INLINE float Vector::GetX() const { return _mm_cvtss_f32(m_data); } FORCE_INLINE float Vector::GetY() const { auto vTemp = GetSplatY(); return _mm_cvtss_f32(vTemp); } FORCE_INLINE float Vector::GetZ() const { auto vTemp = GetSplatZ(); return _mm_cvtss_f32(vTemp); } FORCE_INLINE float Vector::GetW() const { auto vTemp = GetSplatW(); return _mm_cvtss_f32(vTemp); } FORCE_INLINE void Vector::SetX(float x) { m_data = _mm_move_ss(m_data, _mm_set_ss(x)); } FORCE_INLINE void Vector::SetY(float y) { m_data = _mm_insert_ps(m_data, _mm_set_ss(y), 0x10); } FORCE_INLINE void Vector::SetZ(float z) { m_data = _mm_insert_ps(m_data, _mm_set_ss(z), 0x20); } FORCE_INLINE void Vector::SetW(float w) { m_data = _mm_insert_ps(m_data, _mm_set_ss(w), 0x30); } FORCE_INLINE float Vector::operator[](uint32_t i) const { ASSERT(i < 4); switch (i) { case 0: return GetX(); break; case 1: return GetY(); break; case 2: return GetZ(); break; case 3: return GetW(); break; } UNREACHABLE_CODE(); return 0.0f; } FORCE_INLINE bool Vector::IsW1() const { return GetSplatW().IsEqual4(Vector::One); } FORCE_INLINE bool Vector::IsW0() const { return GetSplatW().IsZero4(); } FORCE_INLINE Vector& Vector::SetW0() { SetW(0.0f); return *this; } FORCE_INLINE Vector& Vector::SetW1() { SetW(1.0f); return *this; } FORCE_INLINE Vector Vector::GetWithW0() const { Vector v = *this; v.SetW0(); return v; } FORCE_INLINE Vector Vector::GetWithW1() const { Vector v = *this; v.SetW1(); return v; } FORCE_INLINE Vector Vector::Get2D() const { return Vector::Select(*this, Vector::Zero, Vector::Select0011); } FORCE_INLINE Vector Vector::Get3D() const { return Vector::Select(*this, Vector::Zero, Vector::Select0001); } FORCE_INLINE Vector Vector::operator+(const Vector& v) const { return _mm_add_ps(m_data, v); } FORCE_INLINE Vector& Vector::operator+=(const Vector& v) { m_data = _mm_add_ps(m_data, v); return *this; } FORCE_INLINE Vector Vector::operator-(const Vector& v) const { return _mm_sub_ps(m_data, v); } FORCE_INLINE Vector& Vector::operator-=(const Vector& v) { m_data = _mm_sub_ps(m_data, v); return *this; } FORCE_INLINE Vector Vector::operator*(const Vector& v) const { return _mm_mul_ps(m_data, v); } FORCE_INLINE Vector& Vector::operator*=(const Vector& v) { m_data = _mm_mul_ps(m_data, v); return *this; } FORCE_INLINE Vector Vector::operator/(const Vector& v) const { return _mm_div_ps(m_data, v); } FORCE_INLINE Vector& Vector::operator/=(const Vector& v) { m_data = _mm_div_ps(m_data, v); return *this; } FORCE_INLINE Vector Vector::operator*(float const f) const { return operator*(Vector(f)); } FORCE_INLINE Vector& Vector::operator*=(float const f) { return operator*=(Vector(f)); } FORCE_INLINE Vector Vector::operator/(float const f) const { return operator/(Vector(f)); } FORCE_INLINE Vector& Vector::operator/=(float const f) { return operator/=(Vector(f)); } FORCE_INLINE Vector Vector::operator-() const { return GetNegated(); } FORCE_INLINE Vector Vector::Orthogonal2D() const { static Vector const negX(-1.0f, 1.0f, 1.0f, 1.0f); Vector result; result = _mm_shuffle_ps(*this, *this, _MM_SHUFFLE(3, 2, 0, 1)); result = _mm_mul_ps(result, negX); return result; } FORCE_INLINE Vector Vector::Cross2(const Vector& other) const { Vector vResult = _mm_shuffle_ps(other.m_data, other.m_data, _MM_SHUFFLE(0, 1, 0, 1)); vResult = _mm_mul_ps(vResult, m_data); Vector vTemp = vResult.GetSplatY(); vResult = _mm_sub_ss(vResult, vTemp); vResult = vResult.GetSplatX(); return vResult; } FORCE_INLINE Vector Vector::Cross3(const Vector& other) const { auto vTemp1 = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 0, 2, 1)); auto vTemp2 = _mm_shuffle_ps(other, other, _MM_SHUFFLE(3, 1, 0, 2)); Vector result = _mm_mul_ps(vTemp1, vTemp2); vTemp1 = _mm_shuffle_ps(vTemp1, vTemp1, _MM_SHUFFLE(3, 0, 2, 1)); vTemp2 = _mm_shuffle_ps(vTemp2, vTemp2, _MM_SHUFFLE(3, 1, 0, 2)); vTemp1 = _mm_mul_ps(vTemp1, vTemp2); result = _mm_sub_ps(result, vTemp1); result = _mm_and_ps(result, SIMD::g_maskXYZ0); return result; } FORCE_INLINE Vector Vector::Dot2(const Vector& other) const { // Perform the dot product on m_x and m_y Vector result = _mm_mul_ps(m_data, other); // vTemp has m_y splatted auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 1, 1, 1)); // m_x+m_y result = _mm_add_ss(result, vTemp); result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0)); return result; } FORCE_INLINE Vector Vector::Dot3(const Vector& vOther) const { // Perform the dot product auto vDot = _mm_mul_ps(m_data, vOther); // m_x=Dot.vector4_f32[1], m_y=Dot.vector4_f32[2] auto vTemp = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(2, 1, 2, 1)); // Result.vector4_f32[0] = m_x+m_y vDot = _mm_add_ss(vDot, vTemp); // m_x=Dot.vector4_f32[2] vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1)); // Result.vector4_f32[0] = (m_x+m_y)+m_z vDot = _mm_add_ss(vDot, vTemp); // Splat m_x Vector result = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(0, 0, 0, 0)); return result; } FORCE_INLINE Vector Vector::Dot4(const Vector& other) const { auto vTemp2 = other; auto vTemp = _mm_mul_ps(m_data, vTemp2); vTemp2 = _mm_shuffle_ps(vTemp2, vTemp, _MM_SHUFFLE(1, 0, 0, 0)); // Copy X to the Z position and Y to the W position vTemp2 = _mm_add_ps(vTemp2, vTemp); // Add Z = X+Z; W = Y+W; vTemp = _mm_shuffle_ps(vTemp, vTemp2, _MM_SHUFFLE(0, 3, 0, 0)); // Copy W to the Z position vTemp = _mm_add_ps(vTemp, vTemp2); // Add Z and W together return _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(2, 2, 2, 2)); // Splat Z and return } FORCE_INLINE float Vector::GetDot2(const Vector& other) const { return Dot2(other).ToFloat(); } FORCE_INLINE float Vector::GetDot3(const Vector& other) const { return Dot3(other).ToFloat(); } FORCE_INLINE float Vector::GetDot4(const Vector& other) const { return Dot4(other).ToFloat(); } FORCE_INLINE Vector Vector::ScalarProjection(const Vector& other) const { Vector const normalizedThis = GetNormalized3(); Vector const projection = other.Dot3(normalizedThis); return projection; } FORCE_INLINE float Vector::GetScalarProjection(const Vector& other) const { return ScalarProjection(other).ToFloat(); } FORCE_INLINE Vector Vector::VectorProjection(const Vector& other) const { Vector const normalizedThis = GetNormalized3(); Vector const dotOther = other.Dot3(normalizedThis); Vector const projection = normalizedThis * dotOther; return projection; } FORCE_INLINE Vector& Vector::Invert() { m_data = _mm_div_ps(Vector::One, m_data); return *this; } FORCE_INLINE Vector Vector::GetInverse() const { return _mm_div_ps(Vector::One, m_data); } FORCE_INLINE Vector Vector::GetReciprocal() const { return GetInverse(); } FORCE_INLINE Vector& Vector::InvertEst() { m_data = _mm_rcp_ps(m_data); return *this; } FORCE_INLINE Vector Vector::GetInverseEst() const { return _mm_rcp_ps(m_data); } FORCE_INLINE Vector& Vector::Negate() { m_data = _mm_sub_ps(Vector::Zero, m_data); return *this; } FORCE_INLINE Vector Vector::GetNegated() const { return _mm_sub_ps(Vector::Zero, m_data); } FORCE_INLINE Vector& Vector::Abs() { m_data = _mm_max_ps(_mm_sub_ps(Vector::Zero, m_data), m_data); return *this; } FORCE_INLINE Vector Vector::GetAbs() const { return _mm_max_ps(_mm_sub_ps(Vector::Zero, m_data), m_data); } FORCE_INLINE Vector& Vector::Sqrt() { m_data = _mm_sqrt_ps(m_data); return *this; } FORCE_INLINE Vector Vector::GetSqrt() { return _mm_sqrt_ps(m_data); } FORCE_INLINE Vector& Vector::ReciprocalSqrt() { m_data = _mm_div_ps(Vector::One, _mm_sqrt_ps(m_data)); return *this; } FORCE_INLINE Vector Vector::GetReciprocalSqrt() { return _mm_div_ps(Vector::One, _mm_sqrt_ps(m_data)); } FORCE_INLINE Vector& Vector::EstimatedReciprocalSqrt() { m_data = _mm_rsqrt_ps(m_data); return *this; } FORCE_INLINE Vector Vector::GetEstimatedReciprocalSqrt() { return _mm_rsqrt_ps(m_data); } FORCE_INLINE Vector& Vector::Normalize2() { // Perform the dot product on m_x and m_y only auto vLengthSq = _mm_mul_ps(m_data, m_data); auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); vLengthSq = _mm_add_ss(vLengthSq, vTemp); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); // Prepare for the division auto vResult = _mm_sqrt_ps(vLengthSq); // Create zero with a single instruction auto vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, Vector::Infinity); // Divide to perform the normalization vResult = _mm_div_ps(m_data, vResult); // Any that are infinity, set to zero vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length auto vTemp1 = _mm_andnot_ps(vLengthSq, Vector::QNaN); auto vTemp2 = _mm_and_ps(vResult, vLengthSq); m_data = _mm_or_ps(vTemp1, vTemp2); *this = Select(*this, Vector::Zero, Select0011); return *this; } FORCE_INLINE Vector& Vector::Normalize3() { // Perform the dot product on m_x,m_y and m_z only auto vLengthSq = _mm_mul_ps(m_data, m_data); auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); vLengthSq = _mm_add_ss(vLengthSq, vTemp); vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1)); vLengthSq = _mm_add_ss(vLengthSq, vTemp); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); // Prepare for the division auto vResult = _mm_sqrt_ps(vLengthSq); // Create zero with a single instruction auto vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, Vector::Infinity); // Divide to perform the normalization vResult = _mm_div_ps(m_data, vResult); // Any that are infinity, set to zero vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length auto vTemp1 = _mm_andnot_ps(vLengthSq, Vector::QNaN); auto vTemp2 = _mm_and_ps(vResult, vLengthSq); m_data = _mm_or_ps(vTemp1, vTemp2); *this = Select(*this, Vector::Zero, Select0001); return *this; } FORCE_INLINE Vector& Vector::Normalize4() { // Perform the dot product on m_x,m_y,m_z and m_w auto vLengthSq = _mm_mul_ps(m_data, m_data); // vTemp has m_z and m_w auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); // m_x+m_z, m_y+m_w vLengthSq = _mm_add_ps(vLengthSq, vTemp); // m_x+m_z,m_x+m_z,m_x+m_z,m_y+m_w vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); // ??,??,m_y+m_w,m_y+m_w vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); // ??,??,m_x+m_z+m_y+m_w,?? vLengthSq = _mm_add_ps(vLengthSq, vTemp); // Splat the length vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); // Prepare for the division auto vResult = _mm_sqrt_ps(vLengthSq); // Create zero with a single instruction auto vZeroMask = _mm_setzero_ps(); // Test for a divide by zero (Must be FP to detect -0.0) vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq, Vector::Infinity); // Divide to perform the normalization vResult = _mm_div_ps(m_data, vResult); // Any that are infinity, set to zero vResult = _mm_and_ps(vResult, vZeroMask); // Select qnan or result based on infinite length auto vTemp1 = _mm_andnot_ps(vLengthSq, Vector::QNaN); auto vTemp2 = _mm_and_ps(vResult, vLengthSq); m_data = _mm_or_ps(vTemp1, vTemp2); return *this; } FORCE_INLINE Vector Vector::GetNormalized2() const { Vector v = *this; v.Normalize2(); return v; } FORCE_INLINE Vector Vector::GetNormalized3() const { Vector v = *this; v.Normalize3(); return v; } FORCE_INLINE Vector Vector::GetNormalized4() const { Vector v = *this; v.Normalize4(); return v; } FORCE_INLINE Vector& Vector::Floor() { Vector result; // To handle NAN, INF and numbers greater than 8388608, use masking __m128i vTest = _mm_and_si128(_mm_castps_si128(m_data), SIMD::g_absMask); vTest = _mm_cmplt_epi32(vTest, SIMD::g_noFraction); // Truncate __m128i vInt = _mm_cvttps_epi32(m_data); result = _mm_cvtepi32_ps(vInt); __m128 vLarger = _mm_cmpgt_ps(result, m_data); // 0 -> 0, 0xffffffff -> -1.0f vLarger = _mm_cvtepi32_ps(_mm_castps_si128(vLarger)); result = _mm_add_ps(result, vLarger); // All numbers less than 8388608 will use the round to int result = _mm_and_ps(result, _mm_castsi128_ps(vTest)); // All others, use the ORIGINAL value vTest = _mm_andnot_si128(vTest, _mm_castps_si128(m_data)); result = _mm_or_ps(result, _mm_castsi128_ps(vTest)); m_data = result; return *this; } FORCE_INLINE Vector Vector::GetFloor() const { Vector v = *this; v.Floor(); return v; } FORCE_INLINE Vector& Vector::Ceil() { Vector result; // To handle NAN, INF and numbers greater than 8388608, use masking __m128i vTest = _mm_and_si128(_mm_castps_si128(m_data), SIMD::g_absMask); vTest = _mm_cmplt_epi32(vTest, SIMD::g_noFraction); // Truncate __m128i vInt = _mm_cvttps_epi32(m_data); result = _mm_cvtepi32_ps(vInt); __m128 vSmaller = _mm_cmplt_ps(result, m_data); // 0 -> 0, 0xffffffff -> -1.0f vSmaller = _mm_cvtepi32_ps(_mm_castps_si128(vSmaller)); result = _mm_sub_ps(result, vSmaller); // All numbers less than 8388608 will use the round to int result = _mm_and_ps(result, _mm_castsi128_ps(vTest)); // All others, use the ORIGINAL value vTest = _mm_andnot_si128(vTest, _mm_castps_si128(m_data)); result = _mm_or_ps(result, _mm_castsi128_ps(vTest)); m_data = result; return *this; } FORCE_INLINE Vector Vector::GetCeil() const { Vector v = *this; v.Ceil(); return v; } FORCE_INLINE Vector& Vector::Round() { __m128 sign = _mm_and_ps(m_data, SIMD::g_signMask); __m128 sMagic = _mm_or_ps(SIMD::g_noFraction, sign); __m128 R1 = _mm_add_ps(m_data, sMagic); R1 = _mm_sub_ps(R1, sMagic); __m128 R2 = _mm_and_ps(m_data, SIMD::g_absMask); __m128 mask = _mm_cmple_ps(R2, SIMD::g_noFraction); R2 = _mm_andnot_ps(mask, m_data); R1 = _mm_and_ps(R1, mask); m_data = _mm_xor_ps(R1, R2); return *this; } FORCE_INLINE Vector Vector::GetRound() const { Vector v = *this; v.Round(); return v; } FORCE_INLINE Vector Vector::GetSign() const { Vector const selectMask = GreaterThanEqual(Vector::Zero); return Vector::Select(Vector::NegativeOne, Vector::One, selectMask); } FORCE_INLINE Vector Vector::GetSplatX() const { return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(0, 0, 0, 0)); } FORCE_INLINE Vector Vector::GetSplatY() const { return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1)); } FORCE_INLINE Vector Vector::GetSplatZ() const { return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2)); } FORCE_INLINE Vector Vector::GetSplatW() const { return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3)); } template FORCE_INLINE Vector Vector::Swizzle() const { static_assert(xIdx < 4, "Element index parameter out of range"); static_assert(yIdx < 4, "Element index parameter out of range"); static_assert(zIdx < 4, "Element index parameter out of range"); static_assert(wIdx < 4, "Element index parameter out of range"); return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(wIdx, zIdx, yIdx, xIdx)); } FORCE_INLINE Vector Vector::Swizzle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const { ASSERT(xIdx < 4 && yIdx < 4 && zIdx < 4 && wIdx < 4); uint32_t const elem[4] = { xIdx, yIdx, zIdx, wIdx }; __m128i vControl = _mm_loadu_si128(reinterpret_cast(&elem[0])); return _mm_permutevar_ps(m_data, vControl); } FORCE_INLINE Vector Vector::Shuffle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const { return Swizzle(xIdx, yIdx, zIdx, wIdx); } template FORCE_INLINE Vector Vector::Shuffle() const { return Swizzle(); } FORCE_INLINE Vector Vector::Length2() const { Vector result; result = _mm_mul_ps(m_data, m_data); auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 1, 1, 1)); // m_x+m_y result = _mm_add_ss(result, vTemp); result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0)); result = _mm_sqrt_ps(result); return result; } FORCE_INLINE Vector Vector::Length3() const { Vector result; // Perform the dot product on m_x,m_y and m_z result = _mm_mul_ps(m_data, m_data); // vTemp has m_z and m_y auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 2, 1, 2)); // m_x+m_z, m_y result = _mm_add_ss(result, vTemp); // m_y,m_y,m_y,m_y vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1)); // m_x+m_z+m_y,??,??,?? result = _mm_add_ss(result, vTemp); // Splat the length squared result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0)); // Get the length result = _mm_sqrt_ps(result); return result; } FORCE_INLINE Vector Vector::Length4() const { Vector result; // Perform the dot product on m_x,m_y,m_z and m_w result = _mm_mul_ps(m_data, m_data); // vTemp has m_z and m_w auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(3, 2, 3, 2)); // m_x+m_z, m_y+m_w result = _mm_add_ps(result, vTemp); // m_x+m_z,m_x+m_z,m_x+m_z,m_y+m_w result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 0, 0, 0)); // ??,??,m_y+m_w,m_y+m_w vTemp = _mm_shuffle_ps(vTemp, result, _MM_SHUFFLE(3, 3, 0, 0)); // ??,??,m_x+m_z+m_y+m_w,?? result = _mm_add_ps(result, vTemp); // Splat the length result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(2, 2, 2, 2)); // Get the length result = _mm_sqrt_ps(result); return result; } FORCE_INLINE float Vector::GetLength2() const { return Length2().GetX(); } FORCE_INLINE float Vector::GetLength3() const { return Length3().GetX(); } FORCE_INLINE float Vector::GetLength4() const { return Length4().GetX(); } FORCE_INLINE Vector Vector::InverseLength2() const { // Perform the dot product on m_x and m_y auto vLengthSq = _mm_mul_ps(m_data, m_data); // vTemp has m_y splatted auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); // m_x+m_y vLengthSq = _mm_add_ss(vLengthSq, vTemp); vLengthSq = _mm_sqrt_ss(vLengthSq); vLengthSq = _mm_div_ss(Vector::One, vLengthSq); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); return vLengthSq; } FORCE_INLINE Vector Vector::InverseLength3() const { // Perform the dot product auto vDot = _mm_mul_ps(m_data, m_data); // m_x=Dot.m_y, m_y=Dot.m_z auto vTemp = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(2, 1, 2, 1)); // Result.m_x = m_x+m_y vDot = _mm_add_ss(vDot, vTemp); // m_x=Dot.m_z vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1)); // Result.m_x = (m_x+m_y)+m_z vDot = _mm_add_ss(vDot, vTemp); // Splat m_x vDot = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(0, 0, 0, 0)); // Get the reciprocal vDot = _mm_sqrt_ps(vDot); // Get the reciprocal vDot = _mm_div_ps(Vector::One, vDot); return vDot; } FORCE_INLINE Vector Vector::InverseLength4() const { // Perform the dot product on m_x,m_y,m_z and m_w auto vLengthSq = _mm_mul_ps(m_data, m_data); // vTemp has m_z and m_w auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); // m_x+m_z, m_y+m_w vLengthSq = _mm_add_ps(vLengthSq, vTemp); // m_x+m_z,m_x+m_z,m_x+m_z,m_y+m_w vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); // ??,??,m_y+m_w,m_y+m_w vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); // ??,??,m_x+m_z+m_y+m_w,?? vLengthSq = _mm_add_ps(vLengthSq, vTemp); // Splat the length vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); // Get the reciprocal vLengthSq = _mm_sqrt_ps(vLengthSq); // Accurate! vLengthSq = _mm_div_ps(Vector::One, vLengthSq); return vLengthSq; } FORCE_INLINE float Vector::GetInverseLength2() const { return InverseLength2().GetX(); } FORCE_INLINE float Vector::GetInverseLength3() const { return InverseLength3().GetX(); } FORCE_INLINE float Vector::GetInverseLength4() const { return InverseLength4().GetX(); } FORCE_INLINE Vector Vector::LengthSquared2() const { return Vector::Dot2(m_data, m_data); } FORCE_INLINE Vector Vector::LengthSquared3() const { return Vector::Dot3(m_data, m_data); } FORCE_INLINE Vector Vector::LengthSquared4() const { return Vector::Dot4(m_data, m_data); } FORCE_INLINE float Vector::GetLengthSquared2() const { return LengthSquared2().GetX(); } FORCE_INLINE float Vector::GetLengthSquared3() const { return LengthSquared3().GetX(); } FORCE_INLINE float Vector::GetLengthSquared4() const { return LengthSquared4().GetX(); } FORCE_INLINE Vector Vector::Distance2(const Vector& to) const { return (to - *this).Length2(); } FORCE_INLINE Vector Vector::Distance3(const Vector& to) const { return (to - *this).Length3(); } FORCE_INLINE Vector Vector::Distance4(const Vector& to) const { return (to - *this).Length4(); } FORCE_INLINE float Vector::GetDistance2(const Vector& to) const { return (to - *this).Length2().GetX(); } FORCE_INLINE float Vector::GetDistance3(const Vector& to) const { return (to - *this).Length3().GetX(); } FORCE_INLINE float Vector::GetDistance4(const Vector& to) const { return (to - *this).Length4().GetX(); } FORCE_INLINE Vector Vector::DistanceSquared2(const Vector& to) const { return (to - *this).LengthSquared2(); } FORCE_INLINE Vector Vector::DistanceSquared3(const Vector& to) const { return (to - *this).LengthSquared3(); } FORCE_INLINE Vector Vector::DistanceSquared4(const Vector& to) const { return (to - *this).LengthSquared4(); } FORCE_INLINE float Vector::GetDistanceSquared2(const Vector& to) const { return (to - *this).GetLengthSquared2(); } FORCE_INLINE float Vector::GetDistanceSquared3(const Vector& to) const { return (to - *this).GetLengthSquared3(); } FORCE_INLINE float Vector::GetDistanceSquared4(const Vector& to) const { return (to - *this).GetLengthSquared4(); } FORCE_INLINE bool Vector::IsNormalized2() const { return (LengthSquared2() - Vector::One).Abs().IsLessThanEqual4(Vector::NormalizeCheckThreshold); } FORCE_INLINE bool Vector::IsNormalized3() const { return (LengthSquared3() - Vector::One).Abs().IsLessThanEqual4(Vector::NormalizeCheckThreshold); } FORCE_INLINE bool Vector::IsNormalized4() const { return (LengthSquared4() - Vector::One).Abs().IsLessThanEqual4(Vector::NormalizeCheckThreshold); } FORCE_INLINE Vector Vector::InBounds(const Vector& bounds) const { // Test if less than or equal auto vTemp1 = _mm_cmple_ps(m_data, bounds); // Negate the bounds auto vTemp2 = _mm_mul_ps(bounds, Vector::NegativeOne); // Test if greater or equal (Reversed) vTemp2 = _mm_cmple_ps(vTemp2, m_data); // Blend answers vTemp1 = _mm_and_ps(vTemp1, vTemp2); return vTemp1; } FORCE_INLINE bool Vector::IsInBounds2(const Vector& bounds) const { return ((_mm_movemask_ps(InBounds(bounds)) & 0x3) == 0x3) != 0; } FORCE_INLINE bool Vector::IsInBounds3(const Vector& bounds) const { return ((_mm_movemask_ps(InBounds(bounds)) & 0x7) == 0x7) != 0; } FORCE_INLINE bool Vector::IsInBounds4(const Vector& bounds) const { return (_mm_movemask_ps(InBounds(bounds)) == 0x0f) != 0; } FORCE_INLINE Vector Vector::Equal(const Vector& v) const { return _mm_cmpeq_ps(*this, v); } FORCE_INLINE bool Vector::IsEqual2(const Vector& v) const { return (((_mm_movemask_ps(Equal(v)) & 3) == 3) != 0); } FORCE_INLINE bool Vector::IsEqual3(const Vector& v) const { return (((_mm_movemask_ps(Equal(v)) & 7) == 7) != 0); } FORCE_INLINE bool Vector::IsEqual4(const Vector& v) const { return ((_mm_movemask_ps(Equal(v)) == 0x0f) != 0); } FORCE_INLINE Vector Vector::NearEqual(const Vector& v, const Vector& epsilon) const { // Get the difference auto vDelta = _mm_sub_ps(m_data, v); // Get the absolute value of the difference auto vTemp = _mm_setzero_ps(); vTemp = _mm_sub_ps(vTemp, vDelta); vTemp = _mm_max_ps(vTemp, vDelta); vTemp = _mm_cmple_ps(vTemp, epsilon); return vTemp; } FORCE_INLINE bool Vector::IsNearEqual2(const Vector& v, float epsilon) const { return IsNearEqual2(v, Vector(epsilon)); } FORCE_INLINE bool Vector::IsNearEqual3(const Vector& v, float epsilon) const { return IsNearEqual3(v, Vector(epsilon)); } FORCE_INLINE bool Vector::IsNearEqual4(const Vector& v, float epsilon) const { return IsNearEqual4(v, Vector(epsilon)); } FORCE_INLINE bool Vector::IsNearEqual2(const Vector& v, const Vector& epsilon) const { return (((_mm_movemask_ps(NearEqual(v, epsilon)) & 3) == 0x3) != 0); } FORCE_INLINE bool Vector::IsNearEqual3(const Vector& v, const Vector& epsilon) const { return (((_mm_movemask_ps(NearEqual(v, epsilon)) & 7) == 0x7) != 0); } FORCE_INLINE bool Vector::IsNearEqual4(const Vector& v, const Vector& epsilon) const { return ((_mm_movemask_ps(NearEqual(v, epsilon)) == 0xf) != 0); } FORCE_INLINE Vector Vector::GreaterThan(const Vector& v) const { return _mm_cmpgt_ps(m_data, v); } FORCE_INLINE bool Vector::IsAnyGreaterThan(const Vector& v) const { return !GreaterThan(v).IsZero4(); } FORCE_INLINE bool Vector::IsGreaterThan2(const Vector& v) const { return (((_mm_movemask_ps(GreaterThan(v)) & 3) == 3) != 0); } FORCE_INLINE bool Vector::IsGreaterThan3(const Vector& v) const { return (((_mm_movemask_ps(GreaterThan(v)) & 7) == 7) != 0); } FORCE_INLINE bool Vector::IsGreaterThan4(const Vector& v) const { return ((_mm_movemask_ps(GreaterThan(v)) == 0x0f) != 0); } FORCE_INLINE Vector Vector::GreaterThanEqual(const Vector& v) const { return _mm_cmpge_ps(m_data, v); } FORCE_INLINE bool Vector::IsAnyGreaterThanEqual(const Vector& v) const { return !GreaterThanEqual(v).IsZero4(); } FORCE_INLINE bool Vector::IsGreaterThanEqual2(const Vector& v) const { return ((_mm_movemask_ps(GreaterThanEqual(v)) & 3) == 3) != 0; } FORCE_INLINE bool Vector::IsGreaterThanEqual3(const Vector& v) const { return ((_mm_movemask_ps(GreaterThanEqual(v)) & 7) == 7) != 0; } FORCE_INLINE bool Vector::IsGreaterThanEqual4(const Vector& v) const { return (_mm_movemask_ps(GreaterThanEqual(v)) == 0x0f) != 0; } FORCE_INLINE Vector Vector::LessThan(const Vector& v) const { return _mm_cmplt_ps(m_data, v); } FORCE_INLINE bool Vector::IsAnyLessThan(const Vector& v) const { return !LessThan(v).IsZero4(); } FORCE_INLINE bool Vector::IsLessThan2(const Vector& v) const { return (((_mm_movemask_ps(LessThan(v)) & 3) == 3) != 0); } FORCE_INLINE bool Vector::IsLessThan3(const Vector& v) const { return (((_mm_movemask_ps(LessThan(v)) & 7) == 7) != 0); } FORCE_INLINE bool Vector::IsLessThan4(const Vector& v) const { return ((_mm_movemask_ps(LessThan(v)) == 0x0f) != 0); } FORCE_INLINE Vector Vector::LessThanEqual(const Vector& v) const { return _mm_cmple_ps(m_data, v); } FORCE_INLINE bool Vector::IsAnyLessThanEqual(const Vector& v) const { return !LessThanEqual(v).IsZero4(); } FORCE_INLINE bool Vector::IsLessThanEqual2(const Vector& v) const { return (((_mm_movemask_ps(LessThanEqual(v)) & 3) == 3) != 0); } FORCE_INLINE bool Vector::IsLessThanEqual3(const Vector& v) const { return (((_mm_movemask_ps(LessThanEqual(v)) & 7) == 7) != 0); } FORCE_INLINE bool Vector::IsLessThanEqual4(const Vector& v) const { return ((_mm_movemask_ps(LessThanEqual(v)) == 0x0f) != 0); } FORCE_INLINE Vector Vector::EqualsZero() const { return Equal(Vector::Zero); } FORCE_INLINE bool Vector::IsAnyEqualToZero2() const { return !EqualsZero().IsZero2(); } FORCE_INLINE bool Vector::IsAnyEqualToZero3() const { return !EqualsZero().IsZero3(); } FORCE_INLINE bool Vector::IsAnyEqualToZero4() const { return !EqualsZero().IsZero4(); } FORCE_INLINE bool Vector::IsZero2() const { return IsEqual2(Vector::Zero); } FORCE_INLINE bool Vector::IsZero3() const { return IsEqual3(Vector::Zero); } FORCE_INLINE bool Vector::IsZero4() const { return IsEqual4(Vector::Zero); } FORCE_INLINE Vector Vector::NearEqualsZero(float epsilon) const { return NearEqual(Vector::Zero, Vector(epsilon)); } FORCE_INLINE bool Vector::IsNearZero2(float epsilon) const { return IsNearEqual2(Vector::Zero, Vector(epsilon)); } FORCE_INLINE bool Vector::IsNearZero3(float epsilon) const { return IsNearEqual3(Vector::Zero, Vector(epsilon)); } FORCE_INLINE bool Vector::IsNearZero4(float epsilon) const { return IsNearEqual4(Vector::Zero, Vector(epsilon)); } FORCE_INLINE Vector Vector::EqualsInfinity() const { __m128 vTemp = _mm_and_ps(m_data, SIMD::g_absMask); return _mm_cmpeq_ps(vTemp, Vector::Infinity); } FORCE_INLINE bool Vector::IsInfinite2() const { return (_mm_movemask_ps(EqualsInfinity()) & 3) != 0; } FORCE_INLINE bool Vector::IsInfinite3() const { return (_mm_movemask_ps(EqualsInfinity()) & 7) != 0; } FORCE_INLINE bool Vector::IsInfinite4() const { return (_mm_movemask_ps(EqualsInfinity()) != 0); } FORCE_INLINE Vector Vector::EqualsNaN() const { return _mm_cmpneq_ps(m_data, m_data); } FORCE_INLINE bool Vector::IsNaN2() const { return (_mm_movemask_ps(EqualsNaN()) & 3) != 0; } FORCE_INLINE bool Vector::IsNaN3() const { return (_mm_movemask_ps(EqualsNaN()) & 7) != 0; } FORCE_INLINE bool Vector::IsNaN4() const { return (_mm_movemask_ps(EqualsNaN()) != 0); } FORCE_INLINE bool Vector::IsParallelTo(const Vector& v) const { Vector const vAbsDot = Vector::Dot3(*this, v).GetAbs(); Vector const vAbsDelta = Vector::One - vAbsDot; return vAbsDelta.IsLessThanEqual4(Vector::Epsilon); } FORCE_INLINE void Vector::ToDirectionAndLength2(Vector& direction, float& length) const { Vector const vLength = Length2(); direction = Vector::Select(*this, Vector::Zero, Select0011); direction /= vLength; length = vLength.ToFloat(); } FORCE_INLINE void Vector::ToDirectionAndLength3(Vector& direction, float& length) const { Vector const vLength = Length3(); direction = Vector::Select(*this, Vector::Zero, Select0001); direction /= vLength; length = vLength.ToFloat(); } FORCE_INLINE bool Vector::operator==(const Vector& rhs) const { return IsEqual4(rhs); } FORCE_INLINE bool Vector::operator!=(const Vector& rhs) const { return !IsEqual4(rhs); } } ================================================ FILE: MotionCorrection/src/cpp/Platform.h ================================================ /* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once // Finds the current platform #if defined( __WIN32__ ) || defined( _WIN32 ) # define PLATFORM_WIN32 #else # define PLATFORM_LINUX #endif // // Platform Specific Helpers/Functions // // DLL export #if defined(PLATFORM_WIN32) // Windows # if defined(COMPILER_MSVC) # if defined(STATIC_LIB) # define API # else # if defined(API) # define API __declspec(dllexport) # else # define API __declspec(dllimport) # endif # endif # else # if defined(STATIC_LIB) # define API # else # if defined(API) # define API __attribute__ ((dllexport)) # else # define API __attribute__ ((dllimport)) # endif # endif # endif # define DISABLE_OPTIMIZATION __pragma( optimize( "", off ) ) # define ENABLE_OPTIMIZATION __pragma( optimize( "", on ) ) # define DEBUG_BREAK() // __debugbreak() #else // Linux settings # include # define API __attribute__ ((visibility ("default"))) # define DISABLE_OPTIMIZATION # define ENABLE_OPTIMIZATION # define DEBUG_BREAK() // raise(SIGTRAP) #endif ================================================ FILE: README.md ================================================

Banner License Project Page Documentation

## Overview Kimodo is a **ki**nematic **mo**tion **d**iffusi**o**n model trained on a large-scale (700 hours) commercially-friendly optical motion capture dataset. The model generates high-quality 3D human and robot motions, and is controlled through text prompts and an extensive set of constraints such as full-body pose keyframes, end-effector positions/rotations, 2D paths, and 2D waypoints. Full details of the model architecture and training are available in the [technical report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf). This repository provides: - **Inference**: code and CLI to generate motions on both human and robot skeletons - **Interactive Demo**: easily author motions with a timeline interface of text prompts and kinematic controls - **Benchmark**: [test cases](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) and evaluation code built on the [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset to evaluate motion generation models based on text and constraint-following abilities - **Annotations**: fine-grained temporal text descriptions created for the Kimodo project are included in the [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset. For more information on these labels, see our separate [Hugging Face repo](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations).
## News See the [full changelog](CHANGELOG.md) for a detailed list of all changes. - **[2026-05-03]** _FIX_: fixed a bug causing incorrect calculation of averaged metrics for constraint test cases in the benchmark - **[2026-04-24]** _NEW_: improved multi-prompt generation and better support for small VRAM GPUs via `TEXT_ENCODER_DEVICE=cpu` env var - **[2026-04-10]** Released the [Kimodo Motion Generation Benchmark](#kimodo-motion-generation-benchmark) alongside new v1.1 Kimodo-SOMA models - **[2026-03-19]** **Breaking:** Model inputs/outputs now use the SOMA 77-joint skeleton (`somaskel77`). - **[2026-03-16]** Initial open-source release of Kimodo with five model variants (SOMA, G1, SMPL-X), CLI, interactive demo, and timeline annotations for BONES-SEED. ## Kimodo Models Several variations of Kimodo are available trained on various skeletons and datasets. All models support text-to-motion and kinematic controls. > Note: models will be downloaded automatically when attempting to generate from the CLI or Interactive Demo, so there is no need to download them manually | Model | Skeleton | Training Data | Release Date | Hugging Face | License | |:-------|:-------------|:------:|:------:|:-------------:|:-------------:| | **Kimodo-SOMA-RP-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | April 10, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SOMA-SEED-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | April 10, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SOMA-RP-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-G1-RP-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-G1-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SOMA-SEED-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-G1-SEED-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-G1-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SMPLX-RP-v1** | [SMPL-X](https://github.com/vchoutas/smplx) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) | [NVIDIA R&D Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-internal-scientific-research-and-development-model-license/) | By default, we recommend using the models trained on the full Bones Rigplay 1 dataset (700 hours of mocap) for your motion generation needs. The models trained on BONES-SEED use 288 hours of [publicly available mocap data](https://huggingface.co/datasets/bones-studio/seed) so are less capable, but are useful for comparing to other models trained on BONES-SEED. To easily compare motion generation models to Kimodo, check out our [Motion Generation Benchmark](#kimodo-motion-generation-benchmark). ### Changes in v1.1 The latest v1.1 Kimodo-SOMA models were released primarily for compatibility with our new [Motion Generation Benchmark](#kimodo-motion-generation-benchmark), but also contain minor quality improvements over v1. For details on these improvements, please see the Hugging Face pages for [Kimodo-SOMA-RP-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1#changes-in-v11) and [Kimodo-SOMA-SEED-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1#changes-in-v11). ## Getting Started Please see the full documentation for detailed installation instructions, how to use the CLI and Interactive Demo, and other practical tips for generating motions with Kimodo: **[Full Documentation](https://research.nvidia.com/labs/sil/projects/kimodo/docs)** - [Quick Start Guide](https://research.nvidia.com/labs/sil/projects/kimodo/docs/getting_started/quick_start.html) - [Installation Instructions](https://research.nvidia.com/labs/sil/projects/kimodo/docs/getting_started/installation.html) - [Interactive Motion Authoring Demo](https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html) - [Command-Line Interface](https://research.nvidia.com/labs/sil/projects/kimodo/docs/user_guide/cli.html) - [Benchmark Instructions](https://research.nvidia.com/labs/sil/projects/kimodo/docs/benchmark/introduction.html) - [API Reference](https://research.nvidia.com/labs/sil/projects/kimodo/docs/api_reference/index.html) **Before getting started** with motion generation, please review the [best practices](https://research.nvidia.com/labs/sil/projects/kimodo/docs/key_concepts/limitations.html) and be aware of [model limitations](https://research.nvidia.com/labs/sil/projects/kimodo/docs/key_concepts/limitations.html#limitations). Some notes on installation environment: - Kimodo requires ~17GB of VRAM to generate locally entirely on GPU, primarily due to the text embedding model. If you have a smaller card, set `TEXT_ENCODER_DEVICE=cpu` when running Kimodo commands to force text encoding to the CPU. This is slightly slower but reduces VRAM usage to <3 GB. - The model has been most extensively tested on GeForce RTX 3090, GeForce RTX 4090, and NVIDIA A100 GPUs, but should work on other recent cards with sufficient VRAM - This repo was developed on Linux, though Windows should work especially if using Docker ## Interactive Motion Authoring Demo

**[Demo Documentation and Tutorial](https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html)** The web-based interactive demo provides an intuitive interface for generating motions with any of the Kimodo model variations. After installation, the demo can be launched with the `kimodo_demo` command. It runs locally on http://127.0.0.1:7860. Open this URL in your browser to access the interface (or use port forwarding if set up on a server). ### Demo Features - **Multiple Characters**: Supports generating with the SOMA, G1, and SMPL-X versions of Kimodo - **Text Prompts**: Enter one or more natural language descriptions of desired motions on the timeline - **Timeline Editor**: Add and edit keyframes and constrained intervals on multiple constraint tracks - **Constraint Types**: - Full-Body: Complete joint position constraints at specific frames - 2D Root: Define waypoints or full paths to follow on the ground plane - End-Effectors: Control hands and feet positions/rotations - **Constraint Editing**: Editing mode allows for re-posing of constraints or adjusting waypoints - **3D Visualization**: Real-time rendering of generated motions with skeleton and skinned mesh options - **Playback Controls**: Preview generated motions with adjustable playback speed - **Multiple Samples**: Generate and compare multiple motion variations - **Examples**: Load pre-existing examples to better understand Kimodo's capabilities - **Export**: Save constraints and generated motions for later use ## Command-Line Interface **[CLI Documentation and Examples](https://research.nvidia.com/labs/sil/projects/kimodo/docs/user_guide/cli.html)** Motions can also be generated directly from the command line with the `kimodo_gen` command or by running `python -m kimodo.scripts.generate` directly. **Key Arguments:** - `prompt`: A single text description or sequence of texts for the desired motion (required) - `--model`: Which Kimodo model to use for generation - `--duration`: Motion duration in seconds - `--num_samples`: Number of motion variations to generate - `--constraints`: Constraint file to control the generated motion (e.g., saved from the web demo) - `--diffusion_steps`: Number of denoising steps - `--cfg_type` / `--cfg_weight`: Classifier-free guidance (`nocfg`, `regular` with one weight, or `separated` with two weights for text vs. constraints); see the [CLI docs](https://research.nvidia.com/labs/sil/projects/kimodo/docs/user_guide/cli.html#classifier-free-guidance-cfg) - `--no-postprocess`: Flag to disable foot skate and constraint cleanup post-processing - `--seed`: Random seed for reproducible results The script supports different output formats depending on which skeleton is used. By default, a custom NPZ format is saved that is compatible with the web demo. For Kimodo-G1 models, the motion can be saved in the standard MuJoCo qpos CSV format. For Kimodo-SMPLX, motion can be saved in the standard AMASS npz format for compability with existing pipelines. ### Default NPZ Output Format Generated motions are saved as NPZ files containing: - `posed_joints`: Global joint positions `[T, J, 3]` - `global_rot_mats`: Global joint rotation matrices `[T, J, 3, 3]` - `local_rot_mats`: Local (parent-relative) joint rotation matrices `[T, J, 3, 3]` - `foot_contacts`: Foot contact labels [left heel, left toe, right heel, right toes] `[T, 4]` - `smooth_root_pos`: Smoothed root representations outputted from the model `[T, 3]` - `root_positions`: The (non-smoothed) trajectory of the actual root joint (e.g., pelvis) `[T, 3]` - `global_root_heading`: The heading direction output from the model `[T, 2]` `T` the number of frames and `J` the number of joints. ## Low-Level Python API **[Model API Documentation](https://research.nvidia.com/labs/sil/projects/kimodo/docs/api_reference/model.html#kimodo.model.kimodo_model.Kimodo.__call__)** For maximum flexibility, the low-level model inference API can be called directly, rather than going through our high-level CLI. This allows for advanced model configuration including classifier-free guidance weights and parameters related to transitions in multi-prompt sequences. ## Downstream Robotics Applications of Kimodo ### Visualizing G1 Motions with MuJoCo
After generating motions on the G1 robot skeleton and saving to the MuJoCo qpos CSV file format, they can be easily used and visualized within MuJoCo. A minimal visualization script is available with: ``` python -m kimodo.scripts.mujoco_load ``` Make sure to edit the script to correctly point to your CSV file and install Mujoco before running this. ### Tracking Generated Motions with ProtoMotions
[ProtoMotions](https://github.com/NVlabs/ProtoMotions) is a GPU-accelerated simulation and learning framework for training physically simulated digital humans and humanoid robots. The Kimodo NPZ and CSV output formats are both compatible with ProtoMotions making it easy to train physics-based policies with generated motions from Kimodo. ProtoMotions supports outputs on both the SOMA skeleton and Unitree G1 After generating motions with Kimodo, head over to the [ProtoMotions docs](https://github.com/NVlabs/ProtoMotions?tab=readme-ov-file#-motion-authoring-with-kimodo) to see how to import them. ### Retargeting Motions to Other Robots with GMR
Motions generated by Kimodo-SMPLX can be retargeted to other robots using [General Motion Retargeting (GMR)](https://github.com/YanjieZe/GMR). GMR supports the AMASS NPZ format out of the box, so simply generate motions with Kimodo and use `--output` to save; the AMASS NPZ is written to `stem_amass.npz` (single sample) or in the output folder (multiple samples). Then, use the [SMPL-X to Robot script](https://github.com/YanjieZe/GMR?tab=readme-ov-file#retargeting-from-smpl-x-amass-omomo-to-robot) in GMR to retarget to any supported robot. For example: ``` # run within GMR codebase python scripts/smplx_to_robot.py --smplx_file /path/to/saved/amass_format.npz --robot booster_t1 ``` ### Combining Kimodo with GEAR-SONIC
As a proof of concept, we have also incorporated Kimodo into the [interactive GEAR-SONIC demo](https://nvlabs.github.io/GEAR-SONIC/demo.html). In the demo, Kimodo can be used to generate a kinematic motion on the G1 robot skeleton, then GEAR-SONIC tracks the motion in simulation. ## Kimodo Motion Generation Benchmark [**[Benchmark Documentation](https://research.nvidia.com/labs/sil/projects/kimodo/docs/benchmark/introduction.html)**] [**[Test Suite on Hugging Face](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark)**] Alongside the Kimodo models, we provide a benchmark designed to standardize evaluation for motion generation models with a comprehensive set of test cases. This includes: * **Evaluation Data**: A suite of test cases [available on Hugging Face](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) is used in concert with the [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset to construct the full benchmark. * **Diverse Test Cases**: Test cases cover a wide range of text-conditioned and constraint-conditioned motion generation. * **Evaluation Pipeline**: Code for the full evaluation pipeline including benchmark construction, motion generation, and evaluation. * **Metrics**: Several metrics to evaluate generated motions that cover motion quality, constraint following, and text alignment. Our [TMR-SOMA-RP-v1](https://huggingface.co/nvidia/TMR-SOMA-RP-v1) model trained on all 700 hours of the Bones Rigplay dataset is a powerful embedding model to compute common metrics like R-precision and FID. To facilitate future research, we [report benchmark results](https://research.nvidia.com/labs/sil/projects/kimodo/docs/benchmark/results.html) for Kimodo-SOMA-v1.1 models, which are reproducible and easily comparable to other methods trained on the BONES-SEED data. ## Timeline Annotations for BONES-SEED As detailed in the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf), Kimodo is trained using fine-grained temporal text annotations of mocap clips. While the full [Rigplay 1](https://bones.studio/datasets#rp01) dataset is proprietary, we have released the temporal segmentations for the public [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) subset. These annotations are already included in the BONES-SEED dataset, but the standalone labels and additional information about them is [available on HuggingFace](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations). ## Related Humanoid Work at NVIDIA Kimodo is part of a larger effort to enable humanoid motion data for robotics, physical AI, and other applications. Check out these related works: * [SOMA Body Model](https://github.com/NVlabs/SOMA-X) - a unified parameteric human body model * [BONES-SEED Dataset](https://huggingface.co/datasets/bones-studio/seed) - a large scale human(oid) motion capture dataset in SOMA and G1 format * [ProtoMotions](https://github.com/NVlabs/ProtoMotions) - simulation and learning framework for training physically simulated human(oid)s * [SOMA Retargeter](https://github.com/NVIDIA/soma-retargeter) - SOMA to G1 retargeting tool * [GEM](https://github.com/NVlabs/GEM-X) - human motion reconstruction from video * [GEAR SONIC](https://github.com/NVlabs/GR00T-WholeBodyControl) - humanoid behavior foundation model for physical robots ## Citation If you use this code in your research, please cite: ```bibtex @article{Kimodo2026, title={Kimodo: Scaling Controllable Human Motion Generation}, author={Rempe, Davis and Petrovich, Mathis and Yuan, Ye and Zhang, Haotian and Peng, Xue Bin and Jiang, Yifeng and Wang, Tingwu and Iqbal, Umar and Minor, David and de Ruyter, Michael and Li, Jiefeng and Tessler, Chen and Lim, Edy and Jeong, Eugene and Wu, Sam and Hassani, Ehsan and Huang, Michael and Yu, Jin-Bey and Chung, Chaeyeon and Song, Lina and Dionne, Olivier and Kautz, Jan and Yuen, Simon and Fidler, Sanja}, journal={arXiv:2603.15546}, year={2026} } ``` ## License This codebase is licensed under [Apache-2.0](LICENSE). Note that model checkpoints and data are licensed separately as indicated on the HuggingFace download pages. This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use. ## Acknowledgments This project builds upon excellent open-source projects: - [Viser](https://github.com/nerfstudio-project/viser) for 3D motion authoring demo - [LLM2Vec](https://github.com/McGill-NLP/llm2vec) for text encoding ## Contact For questions or issues, please open an issue on this repository or reach out directly to the authors. --- ================================================ FILE: benchmark/create_benchmark.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """ Step (1) of evaluation pipeline. This script builds the benchmark test suites from BVH motions in the Bones-SEED dataset using the benchmark metadata. Currently it is only set up for the SOMA skeleton. """ import argparse from functools import partial from multiprocessing import Pool from pathlib import Path import numpy as np import torch from tqdm import tqdm from kimodo.geometry import matrix_to_axis_angle from kimodo.motion_rep import KimodoMotionRep from kimodo.skeleton import SOMASkeleton77 from kimodo.skeleton.bvh import parse_bvh_motion from kimodo.tools import load_json, save_json, to_numpy, to_torch FPS = 30 BENCHMARK_REPO_ID = "nvidia/Kimodo-Motion-Gen-Benchmark" def download_benchmark(dest: Path) -> Path: """Download the benchmark testsuite from HuggingFace to *dest*.""" from huggingface_hub import snapshot_download print(f"Downloading benchmark testsuite from {BENCHMARK_REPO_ID} to {dest} ...") snapshot_dir = snapshot_download( repo_id=BENCHMARK_REPO_ID, repo_type="dataset", local_dir=str(dest), ) return Path(snapshot_dir) def discover_seed_motion_folders(root: Path) -> list[Path]: """Find all directories under root that contain seed_motion.json; return sorted list of those dirs.""" root = root.resolve() if not root.is_dir(): raise FileNotFoundError(f"Folder does not exist: {root}") out: list[Path] = [] for meta_path in root.rglob("seed_motion.json"): src_dir = meta_path.parent out.append(src_dir) return sorted(out) def constraints_and_motion_from_seed(folder: str, dataset_folder: str, fps=FPS): """Load seed_motion.json and BVH from folder; subsample to fps, convert to SOMA gt_motion.npz and constraints.""" folder = Path(folder) dataset_folder = Path(dataset_folder) out_path = folder / "gt_motion.npz" seed_motion = load_json(folder / "seed_motion.json") start = seed_motion["crop_start_frame_index"] end = seed_motion["crop_end_frame_index"] bvh_path = dataset_folder / seed_motion["bvh_path"].replace("BVH/", "bvh/") local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(bvh_path) step = round(bvh_fps / fps) # Subsample fps root_trans = root_trans[::step] local_rot_mats = local_rot_mats[::step] skeleton = SOMASkeleton77() # Changing t_pose: essential step local_rot_mats, global_rot_mats = skeleton.to_standard_tpose(local_rot_mats) # Use the motion rep to canonicalize the motion (start z+ at 0,0) # and get other components (smooth root, foot contacts etc) motion_rep = KimodoMotionRep(skeleton, fps) feats = motion_rep(local_rot_mats, root_trans, to_normalize=False) # Crop the features and canonicalizing them feats = feats[start:end] can_feats = motion_rep.canonicalize(feats) # Get back the motion motion = motion_rep.inverse(can_feats, is_normalized=False) motion = to_numpy(to_torch(motion, dtype=torch.float32)) np.savez(out_path, **motion) seed_constraints_path = folder / "seed_constraints.json" if seed_constraints_path.exists(): seed_constraints_lst = load_json(seed_constraints_path) constraints_lst = [] for seed_cons in seed_constraints_lst: cons = seed_cons.copy() frame_indices = cons["frame_indices"] cons["smooth_root_2d"] = motion["smooth_root_pos"][frame_indices][..., [0, 2]].tolist() if cons["type"] == "root2d": if cons.get("use_global_orient", False): cons["global_root_heading"] = motion["global_root_heading"][ # noqa frame_indices ].tolist() elif cons["type"] in ["fullbody"] or cons["type"] in [ "left-hand", "right-hand", "left-foot", "right-foot", "end-effector", ]: cons["local_joints_rot"] = matrix_to_axis_angle( to_torch(motion["local_rot_mats"][frame_indices]) ).tolist() cons["root_positions"] = motion["root_positions"][frame_indices].tolist() else: raise TypeError(f"This constraint type is not recognized: {cons['type']}") constraints_lst.append(cons) # check that it is close to old_constraints_lst save_json(folder / "constraints.json", constraints_lst) def main(): parser = argparse.ArgumentParser( description="Recursively find test case to fill with motions and constraints.", ) parser.add_argument( "benchmark", type=Path, help="Root folder to search recursively or seed_motion.json for to download the benchmark testsuite from HuggingFace to.", ) parser.add_argument( "--dataset", type=Path, default="datasets/bones-seed/soma_uniform", help="SEED dataset folder", ) parser.add_argument( "--overwrite", action="store_true", help="Redo the process even if gt_motion.npz already exists", ) parser.add_argument( "--workers", type=int, default=1, help="Number of parallel worker processes (default: 1, sequential)", ) args = parser.parse_args() folder = args.benchmark.resolve() if not folder.is_dir(): print(f"Benchmark folder not found at {folder}, downloading from HuggingFace...") download_benchmark(folder) dirs = discover_seed_motion_folders(folder) if not dirs: raise SystemExit(f"No directories with seed_motion.json found under {folder}") print(f"Discovered {len(dirs)} motion to populate.") skipped = 0 to_process = [] for d in dirs: if not args.overwrite and (d / "gt_motion.npz").is_file(): skipped += 1 else: to_process.append(d) fn = partial(constraints_and_motion_from_seed, dataset_folder=args.dataset) with Pool(args.workers) as pool: list(tqdm(pool.imap_unordered(fn, to_process), total=len(to_process), desc="Extracting GT motions")) if skipped: print(f"Processed {len(dirs) - skipped} folders, skipped {skipped} (already present).") else: print("Saved gt_motion.npz and constraints.json from the seed files.") if __name__ == "__main__": main() ================================================ FILE: benchmark/embed_folder.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """ Step (3) of evaluation pipeline. This script recursively embeds generated motions, ground-truth motions, and text prompts from a test suite folder tree with the pre-trained TMR model. """ import argparse from pathlib import Path import numpy as np import torch from tqdm import tqdm from kimodo.meta import parse_prompts_from_meta from kimodo.model.load_model import load_model from kimodo.tools import load_json def discover_motion_folders(root: Path) -> list[Path]: root = root.resolve() if not root.is_dir(): raise FileNotFoundError(f"Folder does not exist: {root}") out: list[Path] = [] for meta_path in root.rglob("meta.json"): src_dir = meta_path.parent if (src_dir / "motion.npz").is_file() or (src_dir / "gt_motion.npz").is_file(): out.append(src_dir) return sorted(out) def _load_posed_joints(npz_path: Path, device: str) -> torch.Tensor: data = np.load(npz_path) if "posed_joints" not in data: raise SystemExit(f"NPZ must contain 'posed_joints': {npz_path}") posed_joints = data["posed_joints"] if posed_joints.ndim == 4: if posed_joints.shape[0] != 1: raise SystemExit(f"Expected batch size 1 for posed_joints, got {posed_joints.shape[0]} in {npz_path}") posed_joints = posed_joints[0] if posed_joints.ndim != 3: raise SystemExit(f"Expected posed_joints shape [T, J, 3], got {posed_joints.shape} in {npz_path}") return torch.from_numpy(posed_joints).float().to(device) def main(): parser = argparse.ArgumentParser( description="Recursively embed motion, gt_motion, and text; save motion_embedding.npy, gt_motion_embedding.npy, and text_embedding.npy when present.", ) parser.add_argument( "folder", type=Path, help="Root folder to search recursively for meta.json and motion.npz and/or gt_motion.npz", ) parser.add_argument( "--model", default="tmr-soma-rp", help="Model for encoding (e.g. TMR-SOMA-RP-v1, tmr-soma-rp). Default: tmr-soma-rp", ) parser.add_argument( "--device", default=None, help="Device (default: cuda if available else cpu)", ) parser.add_argument( "--overwrite", action="store_true", help="Re-embed even if embedding files already exist", ) parser.add_argument( "--text_encoder_fp32", action="store_true", help="Uses fp32 for the text encoder rather than default bfloat16.", ) args = parser.parse_args() folder = args.folder.resolve() if not folder.is_dir(): raise SystemExit(f"Folder does not exist or is not a directory: {folder}") device = args.device or ("cuda" if torch.cuda.is_available() else "cpu") model = load_model(modelname=args.model, device=device, default_family="TMR", text_encoder_fp32=args.text_encoder_fp32) dirs = discover_motion_folders(folder) if not dirs: raise SystemExit(f"No directories with meta.json and (motion.npz or gt_motion.npz) found under {folder}") print(f"Discovered {len(dirs)} motion folders.") skipped_motion = 0 skipped_gt = 0 skipped_text = 0 for sample_dir in tqdm(dirs, desc="Embedding"): meta_path = sample_dir / "meta.json" meta = load_json(meta_path) texts, _ = parse_prompts_from_meta(meta) if len(texts) != 1: raise SystemExit(f"Expected exactly one text per motion; got {len(texts)} in {meta_path}") text = texts[0] # Embed motion.npz -> motion_embedding.npy if (sample_dir / "motion.npz").is_file(): if not args.overwrite and (sample_dir / "motion_embedding.npy").is_file(): skipped_motion += 1 else: npz_path = sample_dir / "motion.npz" posed_joints = _load_posed_joints(npz_path, device) with torch.inference_mode(): motion_emb = model.encode_motion(posed_joints, unit_vector=True) np.save(sample_dir / "motion_embedding.npy", motion_emb.cpu().numpy()) # Embed gt_motion.npz -> gt_motion_embedding.npy if (sample_dir / "gt_motion.npz").is_file(): if not args.overwrite and (sample_dir / "gt_motion_embedding.npy").is_file(): skipped_gt += 1 else: npz_path = sample_dir / "gt_motion.npz" posed_joints = _load_posed_joints(npz_path, device) with torch.inference_mode(): gt_motion_emb = model.encode_motion(posed_joints, unit_vector=True) np.save(sample_dir / "gt_motion_embedding.npy", gt_motion_emb.cpu().numpy()) # Embed text -> text_embedding.npy if not args.overwrite and (sample_dir / "text_embedding.npy").is_file(): skipped_text += 1 else: with torch.inference_mode(): text_emb = model.encode_raw_text([text], unit_vector=True) np.save(sample_dir / "text_embedding.npy", text_emb.cpu().numpy()) total_skipped = skipped_motion + skipped_gt + skipped_text if total_skipped: print(f"Embedded {len(dirs)} folders; skipped some existing files (use --overwrite to re-embed).") else: print(f"Saved motion_embedding.npy, gt_motion_embedding.npy, and text_embedding.npy in {len(dirs)} folders.") if __name__ == "__main__": main() ================================================ FILE: benchmark/evaluate_folder.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """ Step (4) of evaluation pipeline. This script recursively computes metrics for generated and ground-truth motions within a test suite folder tree. Saves metrics json files per test case and per group of test cases in the folder tree. """ import argparse import json from itertools import groupby from pathlib import Path from typing import Any import numpy as np import torch from tqdm import tqdm from kimodo.constraints import load_constraints_lst from kimodo.meta import parse_prompts_from_meta from kimodo.metrics import ( ContraintFollow, FootContactConsistency, FootSkateFromContacts, FootSkateFromHeight, FootSkateRatio, TMR_EmbeddingMetric, aggregate_metrics, clear_metrics, compute_metrics, compute_tmr_per_sample_retrieval, ) from kimodo.skeleton import build_skeleton from kimodo.skeleton.definitions import SOMASkeleton30 from kimodo.tools import load_json, to_torch DEFAULT_FPS = 30.0 def discover_motion_folders(root: Path) -> list[tuple[Path, Path]]: root = root.resolve() if not root.is_dir(): raise FileNotFoundError(f"Folder does not exist: {root}") out: list[tuple[Path, Path]] = [] for meta_path in root.rglob("meta.json"): sample_dir = meta_path.parent if (sample_dir / "motion.npz").is_file() and (sample_dir / "gt_motion.npz").is_file(): rel = sample_dir.relative_to(root) out.append((sample_dir, rel)) return sorted(out, key=lambda x: str(x[1])) def group_by_parent(examples: list[tuple[Path, Path]]) -> list[list[tuple[Path, Path]]]: def parent_key(item: tuple[Path, Path]) -> Path: return item[1].parent if len(item[1].parts) > 1 else Path(".") sorted_examples = sorted(examples, key=parent_key) groups: list[list[tuple[Path, Path]]] = [] for _key, group in groupby(sorted_examples, key=parent_key): groups.append(list(group)) return groups def _to_scalar(t: torch.Tensor) -> float: return float(t.mean().item()) if t.numel() > 0 else float(t.item()) def _to_p95(t: torch.Tensor) -> float: if t.numel() == 0: return float("nan") return float(torch.nanquantile(t, torch.tensor(0.95, device=t.device), dim=0).item()) def _per_sample_metrics_from_saved(metrics_list: list, n: int) -> list[dict[str, float]]: per_sample: list[dict[str, float]] = [{} for _ in range(n)] for metric in metrics_list: for key, lst in metric.saved_metrics.items(): for i, t in enumerate(lst): if i >= n: break per_sample[i][key] = _to_scalar(t) return per_sample def _load_pair_embeddings( sample_dir: Path, ) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: motion_emb_path = sample_dir / "motion_embedding.npy" text_emb_path = sample_dir / "text_embedding.npy" gt_motion_emb_path = sample_dir / "gt_motion_embedding.npy" if not (motion_emb_path.is_file() and text_emb_path.is_file()): return None motion_emb = np.load(motion_emb_path) text_emb = np.load(text_emb_path) if motion_emb.ndim == 3 and motion_emb.shape[0] == 1: motion_emb = motion_emb[0] if text_emb.ndim == 3 and text_emb.shape[0] == 1: text_emb = text_emb[0] gt_motion_emb = None if gt_motion_emb_path.is_file(): gt_motion_emb = np.load(gt_motion_emb_path) if gt_motion_emb.ndim == 3 and gt_motion_emb.shape[0] == 1: gt_motion_emb = gt_motion_emb[0] return motion_emb, text_emb, gt_motion_emb def _load_npz_motion( npz_path: Path, device: str, soma30_skel: SOMASkeleton30 | None = None, ) -> tuple[torch.Tensor, torch.Tensor]: """Load posed_joints and foot_contacts from an NPZ, upscaling SOMA30 to SOMA77 if needed.""" data = np.load(npz_path) posed_joints = to_torch(data["posed_joints"], device=device) foot_contacts = to_torch(data["foot_contacts"], device=device) if posed_joints.shape[-2] == 30 and soma30_skel is not None: local_rot_mats = to_torch(data["local_rot_mats"], device=device) root_positions = to_torch(data["root_positions"], device=device) out77 = soma30_skel.output_to_SOMASkeleton77( {"local_rot_mats": local_rot_mats, "root_positions": root_positions, "foot_contacts": foot_contacts} ) posed_joints = out77["posed_joints"] foot_contacts = out77["foot_contacts"] return posed_joints, foot_contacts def _run_eval_on_group( group: list[tuple[Path, Path]], skeleton: torch.nn.Module, metrics_list: list, device: str, group_name: str = "", soma30_skel: SOMASkeleton30 | None = None, ) -> tuple[ list[dict[str, float]], list[dict[str, float]], dict[str, float], dict[str, float], dict[str, float], list[dict[str, Any]], ]: """Run two passes: gen (motion.npz + embeddings) and GT (gt_motion.npz only). Return per_sample_gen, per_sample_gt, aggregated_gen, aggregated_gt, tmr_metrics, tmr_per_sample. """ n = len(group) sample_ids: list[str] = [] texts: list[str] = [] motion_embs: list[np.ndarray] = [] text_embs: list[np.ndarray] = [] # ----- Pass 1: generation (motion.npz + all embeddings) ----- clear_metrics(metrics_list) desc = f"Samples ({group_name})" if group_name else "Samples" for sample_dir, rel_path in tqdm(group, desc=desc, unit="motion"): stem = rel_path.name sample_ids.append(stem) meta_path = sample_dir / "meta.json" meta = load_json(meta_path) texts_parsed, _ = parse_prompts_from_meta(meta) texts.append(texts_parsed[0] if texts_parsed else "") posed_joints, foot_contacts = _load_npz_motion(sample_dir / "motion.npz", device, soma30_skel) nframes = posed_joints.shape[0] lengths = torch.tensor(nframes, dtype=torch.long, device=device) constraints_path = sample_dir / "constraints.json" constraints_lst = ( load_constraints_lst(str(constraints_path), skeleton=skeleton) if constraints_path.is_file() else [] ) metrics_in: dict[str, Any] = { "posed_joints": posed_joints, "foot_contacts": foot_contacts, "lengths": lengths, "constraints_lst": constraints_lst, } text_this = texts_parsed[0] if texts_parsed else "" embs = _load_pair_embeddings(sample_dir) if (text_this or "").strip() and embs is not None: motion_emb, text_emb, gt_motion_emb = embs metrics_in["motion_emb"] = motion_emb metrics_in["text_emb"] = text_emb if gt_motion_emb is not None: metrics_in["gt_motion_emb"] = gt_motion_emb motion_embs.append(motion_emb) text_embs.append(text_emb) compute_metrics(metrics_list, metrics_in) per_sample_gen = _per_sample_metrics_from_saved(metrics_list, n) raw_aggregated_gen = aggregate_metrics(metrics_list) aggregated_gen = {} tmr_metrics: dict[str, float] = {} has_text = len(motion_embs) == n and len(text_embs) == n for key, v in raw_aggregated_gen.items(): val = _to_scalar(v) if key.startswith("TMR/"): if has_text: tmr_metrics[key] = val else: aggregated_gen[key] = val if "constraint_root2d_err" in raw_aggregated_gen: aggregated_gen["constraint_root2d_err_p95"] = _to_p95(raw_aggregated_gen["constraint_root2d_err"]) tmr_per_sample: list[dict[str, Any]] = [] if has_text and motion_embs and text_embs and len(motion_embs) == n and len(text_embs) == n: motion_emb_stack = np.stack(motion_embs, axis=0) text_emb_stack = np.stack(text_embs, axis=0) tmr_per_sample = compute_tmr_per_sample_retrieval(motion_emb_stack, text_emb_stack, sample_ids, texts, top_k=5) # ----- Pass 2: GT (gt_motion.npz only, no embeddings) ----- clear_metrics(metrics_list) for sample_dir, rel_path in tqdm(group, desc=f"GT ({group_name})" if group_name else "GT", unit="motion"): posed_joints, foot_contacts = _load_npz_motion(sample_dir / "gt_motion.npz", device, soma30_skel) nframes = posed_joints.shape[0] lengths = torch.tensor(nframes, dtype=torch.long, device=device) constraints_path = sample_dir / "constraints.json" constraints_lst = ( load_constraints_lst(str(constraints_path), skeleton=skeleton) if constraints_path.is_file() else [] ) metrics_in = { "posed_joints": posed_joints, "foot_contacts": foot_contacts, "lengths": lengths, "constraints_lst": constraints_lst, } compute_metrics(metrics_list, metrics_in) per_sample_gt = _per_sample_metrics_from_saved(metrics_list, n) raw_aggregated_gt = aggregate_metrics(metrics_list) aggregated_gt = {} for key, v in raw_aggregated_gt.items(): if key.startswith("TMR/"): continue aggregated_gt[key] = _to_scalar(v) if "constraint_root2d_err" in raw_aggregated_gt: aggregated_gt["constraint_root2d_err_p95"] = _to_p95(raw_aggregated_gt["constraint_root2d_err"]) return ( per_sample_gen, per_sample_gt, aggregated_gen, aggregated_gt, tmr_metrics, tmr_per_sample, ) def _write_json(path: Path, payload: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") def main(): parser = argparse.ArgumentParser( description="Recursively evaluate generated motions; write metrics.json per folder and .json per parent.", ) parser.add_argument( "folder", type=Path, help="Root folder to search recursively for meta.json + motion.npz + gt_motion.npz", ) parser.add_argument("--device", default=None, help="cuda/cpu. Default: auto") args = parser.parse_args() folder = args.folder.resolve() if not folder.is_dir(): raise SystemExit(f"Folder does not exist: {folder}") device = args.device or ("cuda" if torch.cuda.is_available() else "cpu") examples = discover_motion_folders(folder) if not examples: raise SystemExit(f"No directories with meta.json, motion.npz, and gt_motion.npz found under {folder}") print(f"Discovered {len(examples)} motion folders.") first_posed = np.load(examples[0][0] / "motion.npz")["posed_joints"] num_joints = first_posed.shape[-2] # SOMA models could generate 30-joint output; upscale to 77 for evaluation soma30_skel: SOMASkeleton30 | None = None if num_joints == 30: soma30_skel = SOMASkeleton30().to(device) _ = soma30_skel.somaskel77 # trigger lazy init soma30_skel.somaskel77.to(device) skeleton = soma30_skel.somaskel77 print("Detected SOMA30 motions; will upscale to SOMA77 for evaluation.") else: skeleton = build_skeleton(num_joints).to(device) fps = DEFAULT_FPS kwargs = {"skeleton": skeleton, "fps": fps} metrics_list = [ FootSkateFromHeight(**kwargs), FootSkateFromContacts(**kwargs), FootContactConsistency(**kwargs), FootSkateRatio(**kwargs), ContraintFollow(**kwargs), TMR_EmbeddingMetric(**kwargs), ] groups = group_by_parent(examples) for group in tqdm(groups, desc="Evaluating folders"): sample_dirs = [g[0] for g in group] folder_for_group = sample_dirs[0].parent folder_name = folder_for_group.name ( per_sample_gen, per_sample_gt, aggregated_gen, aggregated_gt, tmr_metrics, tmr_per_sample, ) = _run_eval_on_group(group, skeleton, metrics_list, device, group_name=folder_name, soma30_skel=soma30_skel) texts = [] for sample_dir, _ in group: meta = load_json(sample_dir / "meta.json") texts_parsed, _ = parse_prompts_from_meta(meta) texts.append(texts_parsed[0] if texts_parsed else "") for i, (sample_dir, _) in enumerate(group): metrics_path = sample_dir / "metrics.json" out = { "num_motions": 1, "folder": str(sample_dir), "per_motion_mean_gen": per_sample_gen[i] if i < len(per_sample_gen) else {}, "per_motion_mean_gt": per_sample_gt[i] if i < len(per_sample_gt) else {}, } if i < len(tmr_per_sample): out["tmr"] = { "t2m_rank": tmr_per_sample[i]["rank"], "text": texts[i] if i < len(texts) else "", "top5_retrieved": tmr_per_sample[i]["top_k"], } _write_json(metrics_path, out) parent_json_path = folder_for_group.parent / f"{folder_name}.json" full_metrics = { "num_motions": len(group), "folder": str(folder_for_group), "per_motion_mean_gen": aggregated_gen, "per_motion_mean_gt": aggregated_gt, } if tmr_metrics: full_metrics["tmr"] = tmr_metrics _write_json(parent_json_path, full_metrics) print(f"Wrote metrics.json in each of {len(examples)} folders and folder-level JSONs for {len(groups)} groups.") if __name__ == "__main__": main() ================================================ FILE: benchmark/generate_eval.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """ Step (2) of evaluation pipeline. This script recursively generates motions using Kimodo from a test suite folder tree. """ import argparse import shutil from pathlib import Path from typing import Any import numpy as np import torch from torch.utils.data import DataLoader, Dataset from tqdm.auto import tqdm from kimodo.constraints import load_constraints_lst from kimodo.meta import parse_prompts_from_meta from kimodo.model import DEFAULT_MODEL, load_model from kimodo.tools import load_json, seed_everything def parse_args(): parser = argparse.ArgumentParser(description="Recursively generate motions from a testsuite folder tree") parser.add_argument( "--benchmark", type=str, default="testsuite", help="Root folder containing subfolders with meta.json (default: testsuite)", ) parser.add_argument( "--output", type=str, default=None, help="Output root; directory hierarchy is mirrored here. If omitted, motions are generated in-place inside the testsuite folder.", ) parser.add_argument( "--batch_size", type=int, default=32, help="Batch size for generating motions (default: 32)", ) parser.add_argument( "--num_workers", type=int, default=4, help="DataLoader workers for loading meta/constraints paths (default: 4)", ) parser.add_argument( "--model", type=str, default=DEFAULT_MODEL, help="Name of the model (e.g. Kimodo-SOMA-RP-v1.1, kimodo-soma-rp, or SOMA).", ) parser.add_argument( "--diffusion_steps", type=int, default=100, help="Number of diffusion steps (default: 100); overridden by meta.json if present", ) parser.add_argument( "--postprocess", action="store_true", help="Apply motion post-processing to reduce foot skating", ) parser.add_argument( "--overwrite", action="store_true", help="Regenerate outputs even if motion.npz already exists", ) parser.add_argument( "--text_encoder_fp32", action="store_true", help="Uses fp32 for instantiating the text encoder (if API is not already running) rather than default bfloat16.", ) return parser.parse_args() def discover_example_folders(root: Path) -> list[tuple[Path, Path]]: """Discover leaf directories that contain meta.json. Returns list of (src_dir, rel_path). """ root = root.resolve() if not root.is_dir(): raise FileNotFoundError(f"Testsuite folder does not exist: {root}") out: list[tuple[Path, Path]] = [] for meta_path in root.rglob("meta.json"): src_dir = meta_path.parent rel = src_dir.relative_to(root) out.append((src_dir, rel)) return sorted(out, key=lambda x: str(x[1])) def copy_source_files(src_dir: Path, out_dir: Path) -> None: """Copy meta.json, constraints.json, and gt_motion.npz (if present) from src_dir to out_dir.""" out_dir.mkdir(parents=True, exist_ok=True) for name in ("meta.json", "constraints.json", "gt_motion.npz"): src_file = src_dir / name if src_file.is_file(): shutil.copy2(src_file, out_dir / name) class EvalExampleDataset(Dataset): """Dataset of example folders: yields text, num_frame, constraints_path (and paths, meta). No torch/skeleton in workers so num_workers > 0 is safe with CUDA. """ def __init__( self, examples: list[tuple[Path, Path]], testsuite_root: Path, generated_root: Path, fps: float, ): self.examples = examples self.testsuite_root = testsuite_root self.generated_root = generated_root self.fps = fps def __len__(self) -> int: return len(self.examples) def __getitem__(self, idx: int) -> dict[str, Any]: src_dir, rel_path = self.examples[idx] out_dir = self.generated_root / rel_path meta_path = src_dir / "meta.json" meta = load_json(str(meta_path)) assert meta.get("num_samples", 1) == 1, "Expected num_samples to be absent or 1 in meta.json" texts, durations_sec = parse_prompts_from_meta(meta) assert len(texts) == 1, "Expected exactly one prompt (len(texts)==1) per example" num_frames = [int(float(d) * self.fps) for d in durations_sec] assert len(num_frames) == 1, "Expected exactly one duration per example" constraints_path = src_dir / "constraints.json" cpath = str(constraints_path) if constraints_path.is_file() else None return { "rel_path": rel_path, "src_dir": str(src_dir), "out_dir": str(out_dir), "meta": meta, "text": texts[0], "num_frame": num_frames[0], "constraints_path": cpath, } def collate_examples(batch: list[dict]) -> dict[str, Any]: """Collate list of example dicts; keep list fields as lists (no stacking).""" if not batch: return {} keys = batch[0].keys() out: dict[str, Any] = {} for k in keys: vals = [b[k] for b in batch] out[k] = vals return out def group_by_parent( examples: list[tuple[Path, Path]], ) -> list[list[tuple[Path, Path]]]: """Group (src_dir, rel_path) by parent directory of rel_path for folder-by-folder processing.""" from itertools import groupby def parent_key(item: tuple[Path, Path]) -> Path: rel = item[1] return rel.parent if len(rel.parts) > 1 else Path(".") sorted_examples = sorted(examples, key=parent_key) groups: list[list[tuple[Path, Path]]] = [] for _key, group in groupby(sorted_examples, key=parent_key): groups.append(list(group)) return groups def _slice_output_at(output: dict[str, Any], index: int) -> dict[str, Any]: """Slice a (possibly nested) output dict at batch index for one sample.""" out: dict[str, Any] = {} for k, v in output.items(): if isinstance(v, dict): out[k] = _slice_output_at(v, index) elif isinstance(v, np.ndarray) and v.ndim > 0: out[k] = v[index] else: out[k] = v return out def _crop_output(output: dict[str, Any], num_frames: int) -> dict[str, Any]: """Crop a single-sample output dict along the time dimension (axis 0).""" out: dict[str, Any] = {} for k, v in output.items(): if isinstance(v, dict): out[k] = _crop_output(v, num_frames) elif isinstance(v, np.ndarray) and v.ndim >= 1: out[k] = v[:num_frames] else: out[k] = v return out def main(): device = "cuda:0" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") args = parse_args() testsuite_root = Path(args.benchmark).resolve() if args.output is not None: generated_root = Path(args.output).resolve() else: generated_root = testsuite_root in_place = generated_root == testsuite_root examples = discover_example_folders(testsuite_root) if not examples: raise SystemExit(f"No folders with meta.json found under {testsuite_root}") print(f"Discovered {len(examples)} example folders.") model, resolved_name = load_model( args.model, device=device, default_family="Kimodo", return_resolved_name=True, text_encoder_fp32=args.text_encoder_fp32, ) # v1.1 models are meant to be used for benchmark evaluation _deprecated_for_benchmark = { "kimodo-soma-rp-v1": "Kimodo-SOMA-RP-v1 was not trained to be compatible with the benchmark evaluation.", "kimodo-soma-seed-v1": "Kimodo-SOMA-SEED-v1 is not the latest model for benchmark evaluation.", } if resolved_name in _deprecated_for_benchmark: import warnings warnings.warn( f"Model '{args.model}' resolved to {resolved_name}: " f"{_deprecated_for_benchmark[resolved_name]} Consider using v1.1.", stacklevel=1, ) print(f"Generating with model: {resolved_name}") fps = model.fps default_diffusion_steps = args.diffusion_steps groups = group_by_parent(examples) total_generated = 0 total_skipped = 0 total_examples = len(examples) for group in groups: rel_path_0 = group[0][1] if rel_path_0.parent != Path("."): folder_label = str(rel_path_0.parent) else: # Direct children of testsuite root: show root name (e.g. inbetweening) folder_label = testsuite_root.name num_in_folder = len(group) print(f"Generating folder: {folder_label} ({num_in_folder} motions)") dataset = EvalExampleDataset( group, testsuite_root, generated_root, fps=fps, ) loader = DataLoader( dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_examples, ) folder_generated = 0 folder_skipped = 0 for batch_idx, batch in enumerate(loader): rel_paths = batch["rel_path"] src_dirs = batch["src_dir"] out_dirs = batch["out_dir"] metas = batch["meta"] batch_texts = batch["text"] batch_num_frames = batch["num_frame"] constraints_paths = batch["constraints_path"] # Filter out samples that are already generated (unless --overwrite). if args.overwrite: selected_indices = list(range(len(rel_paths))) else: selected_indices = [] for i, out_dir_str in enumerate(out_dirs): motion_path = Path(out_dir_str) / "motion.npz" if motion_path.is_file(): folder_skipped += 1 total_skipped += 1 continue selected_indices.append(i) if not selected_indices: print( f"\r Generated {folder_generated} / {num_in_folder} (skipped: {folder_skipped}) " f"(total: {total_generated + total_skipped} / {total_examples})", end="", flush=True, ) continue rel_paths = [rel_paths[i] for i in selected_indices] src_dirs = [src_dirs[i] for i in selected_indices] out_dirs = [out_dirs[i] for i in selected_indices] metas = [metas[i] for i in selected_indices] batch_texts = [batch_texts[i] for i in selected_indices] batch_num_frames = [batch_num_frames[i] for i in selected_indices] constraints_paths = [constraints_paths[i] for i in selected_indices] # Load constraints in main process on model device (no torch in workers) device_t = torch.device(device) batch_constraints_lst = [ load_constraints_lst(cpath, model.skeleton, device=device_t) if cpath else [] for cpath in constraints_paths ] if not in_place: for i in range(len(rel_paths)): copy_source_files(Path(src_dirs[i]), Path(out_dirs[i])) # Use first example's diffusion_steps and seed for the whole batch diffusion_steps = metas[0].get("diffusion_steps", default_diffusion_steps) seed = metas[0].get("seed", None) if seed is not None: seed_everything(seed) else: print("Warning: No seed found in meta.json, not seeding this batch.") # Single model call for the entire batch (count in bar title, bar clears when done) bar_desc = ( f" Generated {folder_generated} / {num_in_folder} " f"(skipped: {folder_skipped}) (total: {total_generated + total_skipped} / {total_examples})" ) output = model( batch_texts, batch_num_frames, constraint_lst=batch_constraints_lst, num_denoising_steps=diffusion_steps, multi_prompt=False, post_processing=args.postprocess, return_numpy=True, progress_bar=lambda x: tqdm(x, leave=False, desc=bar_desc), ) # Save each sample to its output dir B = len(batch_texts) for b in range(B): out_dir = Path(out_dirs[b]) sample_output = _slice_output_at(output, b) sample_output = _crop_output(sample_output, batch_num_frames[b]) motion_path = out_dir / "motion.npz" np.savez(motion_path, **sample_output) total_generated += 1 folder_generated += 1 print( f"\r Generated {folder_generated} / {num_in_folder} (skipped: {folder_skipped}) " f"(total: {total_generated + total_skipped} / {total_examples})", end="", flush=True, ) print() print( f" Finished folder {folder_label} ({num_in_folder} motions, " f"generated: {folder_generated}, skipped: {folder_skipped})." ) if in_place: print(f"Generated {total_generated} motions in-place under {testsuite_root}.") else: print(f"Generated {total_generated} motions under {generated_root}.") if __name__ == "__main__": main() ================================================ FILE: benchmark/parse_folder.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """ Step (5) of evaluation pipeline. Validate testcase result JSONs and aggregate benchmark rows. Expected testsuite layout (aligned with evaluate_folder output): / ├── / # e.g. content, repetition │ ├── text2motion/ # text-following eval │ │ ├── overview/ # or timeline_single, timeline_multi │ │ │ └── .json │ │ └── ... │ └── / # constraints_withtext, constraints_notext │ └── .../ # optional subdirs, e.g. root, fullbody │ └── / │ └── .json Samples are discovered via rglob('meta.json') with motion.npz and gt_motion.npz in the same dir. Testcase dir = parent of a sample dir. Result file = testcase_dir.parent / f"{testcase_dir.name}.json". """ from __future__ import annotations import argparse import json from collections import defaultdict from pathlib import Path from typing import Any SPLITS = ("content", "repetition") TEXT_FOLLOWING_CATEGORIES = ("overview", "timeline_single", "timeline_multi") CONSTRAINTS_CATEGORIES = ("constraints_withtext", "constraints_notext") ROW_CATEGORIES = TEXT_FOLLOWING_CATEGORIES + CONSTRAINTS_CATEGORIES def _discover_sample_dirs(root: Path) -> list[Path]: sample_dirs: list[Path] = [] for meta_path in root.rglob("meta.json"): sample_dir = meta_path.parent if (sample_dir / "motion.npz").is_file() and (sample_dir / "gt_motion.npz").is_file(): sample_dirs.append(sample_dir) return sorted(set(sample_dirs)) def _discover_testcase_dirs(root: Path) -> list[Path]: sample_dirs = _discover_sample_dirs(root) return sorted({sample_dir.parent for sample_dir in sample_dirs}) def _expected_result_path(testcase_dir: Path) -> Path: return testcase_dir.parent / f"{testcase_dir.name}.json" def _parse_testcase_key(root: Path, testcase_dir: Path) -> tuple[str, str]: rel_parts = testcase_dir.relative_to(root).parts if len(rel_parts) < 2: raise ValueError(f"Unexpected testcase path shape: {testcase_dir} (relative: {'/'.join(rel_parts)})") split = rel_parts[0] if split not in SPLITS: raise ValueError(f"Unknown split '{split}' for testcase {testcase_dir}") if len(rel_parts) >= 3 and rel_parts[1] == "text2motion": category = rel_parts[2] if category not in TEXT_FOLLOWING_CATEGORIES: raise ValueError(f"Unknown text-following category '{category}' for testcase {testcase_dir}") else: category = rel_parts[1] if category not in CONSTRAINTS_CATEGORIES: raise ValueError(f"Unknown category '{category}' for testcase {testcase_dir}") return split, category def _accumulate_weighted( sum_acc: dict[str, float], weight_acc: dict[str, float], metric_dict: dict[str, Any], weight: float, ) -> None: for metric_name, value in metric_dict.items(): if isinstance(value, (int, float)): sum_acc[metric_name] = sum_acc.get(metric_name, 0.0) + float(value) * weight weight_acc[metric_name] = weight_acc.get(metric_name, 0.0) + weight def _to_averages( weighted_sum: dict[str, float], weight: dict[str, float] ) -> dict[str, float]: return { k: v / weight[k] for k, v in sorted(weighted_sum.items()) if weight.get(k, 0.0) > 0 } def _load_result_row( result_path: Path, ) -> tuple[float, dict[str, Any], dict[str, Any], dict[str, Any]]: payload = json.loads(result_path.read_text(encoding="utf-8")) num_motions = float(payload.get("num_motions", 1)) per_motion_mean_gen = payload.get("per_motion_mean_gen") or payload.get("per_motion_mean", {}) per_motion_mean_gt = payload.get("per_motion_mean_gt") or {} tmr = payload.get("tmr") or {} if not isinstance(per_motion_mean_gen, dict): raise ValueError(f"'per_motion_mean_gen' / 'per_motion_mean' is not a dict in {result_path}") if not isinstance(per_motion_mean_gt, dict): raise ValueError(f"'per_motion_mean_gt' is not a dict in {result_path}") if not isinstance(tmr, dict): raise ValueError(f"'tmr' is not a dict in {result_path}") return num_motions, per_motion_mean_gen, per_motion_mean_gt, tmr # Display labels for table rows (paper-style). TEXT_FOLLOWING_ROW_LABELS = { "overview": "Overview", "timeline_single": "Timeline single", "timeline_multi": "Timeline multi", } CONSTRAINTS_ROW_LABELS = { "constraints_withtext": "Constraints with text", "constraints_notext": "Constraints without text", } # Meters to cm for constraint position metrics. M_TO_CM = 100.0 def _table_value(val: float | None) -> float | str | None: """Return value for JSON table; use None for missing (omit or serialize as null).""" if val is None: return None if isinstance(val, (int, float)) and (val != val or val == float("inf")): # nan or inf return None return val def _build_tables( row_acc: dict[tuple[str, str], dict[str, Any]], ) -> dict[str, dict[str, list[dict[str, Any]]]]: """Build text_following and constraints tables per split for paper-style output.""" tables: dict[str, dict[str, list[dict[str, Any]]]] = {} for split in SPLITS: tables[split] = {"text_following": [], "constraints": []} # Text-following table: Overview, Timeline single, Timeline multi. for category in TEXT_FOLLOWING_CATEGORIES: acc = row_acc[(split, category)] per_motion_gen = _to_averages(acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"]) per_motion_gt = _to_averages(acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"]) tmr_avg = _to_averages(acc["tmr_weighted_sum"], acc["tmr_weight"]) if acc["tmr_weight"] else {} r03_gen = tmr_avg.get("TMR/t2m_R/R03") r03_gt = tmr_avg.get("TMR/t2m_gt_R/R03") fid_gen_text = tmr_avg.get("TMR/FID/gen_text") fid_gt_text = tmr_avg.get("TMR/FID/gt_text") fid_gen_gt = tmr_avg.get("TMR/FID/gen_gt") # Skate is velocity in m/s; convert to cm/s for display. skate_gen = per_motion_gen.get("foot_skate_from_pred_contacts") skate_gt = per_motion_gt.get("foot_skate_from_pred_contacts") contact_gen = per_motion_gen.get("foot_contact_consistency") contact_gt = per_motion_gt.get("foot_contact_consistency") row_label = TEXT_FOLLOWING_ROW_LABELS[category] tables[split]["text_following"].append( { "row": row_label, "R@3 (gen)": _table_value(r03_gen), "R@3 (GT)": _table_value(r03_gt), "FID gen-text": _table_value(fid_gen_text), "FID GT-text": _table_value(fid_gt_text), "FID gen-GT": _table_value(fid_gen_gt), "Skate (gen, cm/s)": _table_value(skate_gen * 100.0 if skate_gen is not None else None), "Skate (GT, cm/s)": _table_value(skate_gt * 100.0 if skate_gt is not None else None), "Contact (gen)": _table_value(contact_gen), "Contact (GT)": _table_value(contact_gt), } ) # Constraints table: Constraints with text, Constraints without text. for category in CONSTRAINTS_CATEGORIES: acc = row_acc[(split, category)] per_motion_gen = _to_averages(acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"]) per_motion_gt = _to_averages(acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"]) row_label = CONSTRAINTS_ROW_LABELS[category] row_dict: dict[str, Any] = { "row": row_label, "Full-Body Pos (gen, cm)": _table_value( per_motion_gen.get("constraint_fullbody_keyframe") * M_TO_CM if per_motion_gen.get("constraint_fullbody_keyframe") is not None else None ), "Full-Body Pos (GT, cm)": _table_value( per_motion_gt.get("constraint_fullbody_keyframe") * M_TO_CM if per_motion_gt.get("constraint_fullbody_keyframe") is not None else None ), "End-Effector Pos (gen, cm)": _table_value( per_motion_gen.get("constraint_end_effector") * M_TO_CM if per_motion_gen.get("constraint_end_effector") is not None else None ), "End-Effector Pos (GT, cm)": _table_value( per_motion_gt.get("constraint_end_effector") * M_TO_CM if per_motion_gt.get("constraint_end_effector") is not None else None ), "End-Effector Rot (deg)": None, # Not implemented in metrics. "2D Root Pos (gen, cm)": _table_value( per_motion_gen.get("constraint_root2d_err") * M_TO_CM if per_motion_gen.get("constraint_root2d_err") is not None else None ), "2D Root Pos (GT, cm)": _table_value( per_motion_gt.get("constraint_root2d_err") * M_TO_CM if per_motion_gt.get("constraint_root2d_err") is not None else None ), "2D Pelvis Pos@95% (gen, cm)": _table_value( per_motion_gen.get("constraint_root2d_err_p95") * M_TO_CM if per_motion_gen.get("constraint_root2d_err_p95") is not None else None ), "2D Pelvis Pos@95% (GT, cm)": _table_value( per_motion_gt.get("constraint_root2d_err_p95") * M_TO_CM if per_motion_gt.get("constraint_root2d_err_p95") is not None else None ), } tables[split]["constraints"].append(row_dict) return tables def _fmt_md(val: float | None, decimals: int) -> str: """Format a numeric value for a markdown cell, or '-' for None/NaN.""" if val is None: return "-" if isinstance(val, float) and (val != val or val == float("inf")): return "-" return f"{val:.{decimals}f}" def _print_tf_formatted_md( splits_data: list[tuple[str, list[dict[str, Any]]]], title: str, ) -> None: """Print text-following table in markdown, mirroring the terminal layout.""" groups = ["Overview", "Timeline single", "Timeline multi"] specs: list[tuple[str, int]] = [ ("R@3\u2191", 2), ("FID\u2193", 3), ("Skate\u2193", 3), ("Contact\u2191", 3), ] gt_keys = ["R@3 (GT)", None, "Skate (GT, cm/s)", "Contact (GT)"] gen_keys = ["R@3 (gen)", "FID gen-GT", "Skate (gen, cm/s)", "Contact (gen)"] gt_defaults: list[float | None] = [None, 0.0, None, None] headers = [""] for g in groups: for hdr, _ in specs: headers.append(f"{g} {hdr}") print(f"\n### {title}\n") print("| " + " | ".join(headers) + " |") print("| " + " | ".join("---" for _ in headers) + " |") for split_label, rows in splits_data: for row_type, keys, defaults in [ ("Ground Truth", gt_keys, gt_defaults), ("Method", gen_keys, [None] * len(specs)), ]: cells = [f"**{split_label}** {row_type}"] for row in rows: for j, (_, dec) in enumerate(specs): key = keys[j] val = defaults[j] if key is None else row.get(key) cells.append(_fmt_md(val, dec)) print("| " + " | ".join(cells) + " |") print() def _print_c_formatted_md( splits_data: list[tuple[str, list[dict[str, Any]]]], title: str, ) -> None: """Print constraints table in markdown, mirroring the terminal layout.""" groups = ["With text", "Without text"] specs: list[tuple[str, int]] = [ ("FB Pos\u2193", 3), ("EE Pos\u2193", 3), ("EE Rot\u2193", 3), ("2D Root\u2193", 3), ("Pelvis@95%", 2), ] gt_keys = [ "Full-Body Pos (GT, cm)", "End-Effector Pos (GT, cm)", "End-Effector Rot (deg)", "2D Root Pos (GT, cm)", "2D Pelvis Pos@95% (GT, cm)", ] gen_keys = [ "Full-Body Pos (gen, cm)", "End-Effector Pos (gen, cm)", "End-Effector Rot (deg)", "2D Root Pos (gen, cm)", "2D Pelvis Pos@95% (gen, cm)", ] headers = [""] for g in groups: for hdr, _ in specs: headers.append(f"{g} {hdr}") print(f"\n### {title}\n") print("| " + " | ".join(headers) + " |") print("| " + " | ".join("---" for _ in headers) + " |") for split_label, rows in splits_data: for row_type, keys in [("Ground Truth", gt_keys), ("Method", gen_keys)]: cells = [f"**{split_label}** {row_type}"] for row in rows: for j, (_, dec) in enumerate(specs): cells.append(_fmt_md(row.get(keys[j]), dec)) print("| " + " | ".join(cells) + " |") print() def _print_formatted_gt_method_md( tables: dict[str, dict[str, list[dict[str, Any]]]], ) -> None: """Print combined tables in markdown format, mirroring the terminal layout.""" tf_splits: list[tuple[str, list[dict[str, Any]]]] = [] c_splits: list[tuple[str, list[dict[str, Any]]]] = [] for split in SPLITS: split_tables = tables.get(split, {}) tf_rows = split_tables.get("text_following", []) c_rows = split_tables.get("constraints", []) if tf_rows and len(tf_rows) == 3: tf_splits.append((split.capitalize(), tf_rows)) if c_rows and len(c_rows) == 2: c_splits.append((split.capitalize(), c_rows)) if tf_splits: _print_tf_formatted_md(tf_splits, "Text-Following Evaluation") if c_splits: _print_c_formatted_md(c_splits, "Constrained Evaluation") def _fmt(val: float | None, decimals: int, width: int) -> str: """Format a numeric value right-aligned to *width*, or '-' for None.""" if val is None: return f"{'-':>{width}}" return f"{val:>{width}.{decimals}f}" def _print_grouped_rows( label: str, rows: list[dict[str, Any]], specs: list[tuple[str, int, int]], keys: list[str], mw: int, sep: str, ) -> None: """Print one data row across all column groups.""" parts = [f"{label:<{mw}}"] for i, row in enumerate(rows): if i: parts.append(sep) for j, (_, dec, w) in enumerate(specs): parts.append(_fmt(row.get(keys[j]), dec, w)) print("".join(parts)) def _print_tf_formatted( splits_data: list[tuple[str, list[dict[str, Any]]]], title: str, ) -> None: """Print text-following table with Overview / Timeline single / Timeline multi groups. *splits_data* is a list of ``(split_label, category_rows)`` tuples so that content and repetition splits appear as separate row-pairs inside one table. """ groups = ["Overview", "Timeline single", "Timeline multi"] specs: list[tuple[str, int, int]] = [ ("R@3\u2191", 2, 7), ("FID\u2193", 3, 7), ("Skate\u2193", 3, 9), ("Contact\u2191", 3, 10), ] gt_keys = ["R@3 (GT)", None, "Skate (GT, cm/s)", "Contact (GT)"] gen_keys = ["R@3 (gen)", "FID gen-GT", "Skate (gen, cm/s)", "Contact (gen)"] gt_defaults: list[float | None] = [None, 0.0, None, None] mw = 16 gw = sum(s[2] for s in specs) sep = " | " total_w = mw + len(groups) * gw + (len(groups) - 1) * len(sep) print(f"\n{title:^{total_w}}") print("=" * total_w) parts: list[str] = [" " * mw] for i, g in enumerate(groups): if i: parts.append(sep) parts.append(g.center(gw)) print("".join(parts)) parts = [f"{'':<{mw}}"] for i in range(len(groups)): if i: parts.append(sep) for hdr, _, w in specs: parts.append(f"{hdr:>{w}}") print("".join(parts)) parts = ["\u2500" * mw] for i in range(len(groups)): if i: parts.append("\u2500\u253c\u2500") parts.append("\u2500" * gw) print("".join(parts)) for si, (split_label, rows) in enumerate(splits_data): tag = f"\u2500\u2500 {split_label} " print(tag + "\u2500" * (total_w - len(tag))) parts = [f"{'Ground Truth':<{mw}}"] for i, row in enumerate(rows): if i: parts.append(sep) for j, (_, dec, w) in enumerate(specs): key = gt_keys[j] val = gt_defaults[j] if key is None else row.get(key) parts.append(_fmt(val, dec, w)) print("".join(parts)) _print_grouped_rows("Method", rows, specs, gen_keys, mw, sep) print() def _print_c_formatted( splits_data: list[tuple[str, list[dict[str, Any]]]], title: str, ) -> None: """Print constraints table with With text / Without text groups. *splits_data* is a list of ``(split_label, category_rows)`` tuples. """ groups = ["With text", "Without text"] specs: list[tuple[str, int, int]] = [ ("FB Pos\u2193", 3, 10), ("EE Pos\u2193", 3, 10), ("EE Rot\u2193", 3, 10), ("2D Root\u2193", 3, 11), ("Pelvis@95%", 2, 12), ] gt_keys = [ "Full-Body Pos (GT, cm)", "End-Effector Pos (GT, cm)", "End-Effector Rot (deg)", "2D Root Pos (GT, cm)", "2D Pelvis Pos@95% (GT, cm)", ] gen_keys = [ "Full-Body Pos (gen, cm)", "End-Effector Pos (gen, cm)", "End-Effector Rot (deg)", "2D Root Pos (gen, cm)", "2D Pelvis Pos@95% (gen, cm)", ] mw = 16 gw = sum(s[2] for s in specs) sep = " | " total_w = mw + len(groups) * gw + (len(groups) - 1) * len(sep) print(f"\n{title:^{total_w}}") print("=" * total_w) parts: list[str] = [" " * mw] for i, g in enumerate(groups): if i: parts.append(sep) parts.append(g.center(gw)) print("".join(parts)) parts = [f"{'':<{mw}}"] for i in range(len(groups)): if i: parts.append(sep) for hdr, _, w in specs: parts.append(f"{hdr:>{w}}") print("".join(parts)) parts = ["\u2500" * mw] for i in range(len(groups)): if i: parts.append("\u2500\u253c\u2500") parts.append("\u2500" * gw) print("".join(parts)) for si, (split_label, rows) in enumerate(splits_data): tag = f"\u2500\u2500 {split_label} " print(tag + "\u2500" * (total_w - len(tag))) _print_grouped_rows("Ground Truth", rows, specs, gt_keys, mw, sep) _print_grouped_rows("Method", rows, specs, gen_keys, mw, sep) print() def _print_formatted_gt_method( tables: dict[str, dict[str, list[dict[str, Any]]]], ) -> None: """Print combined tables with column groups separated by vertical bars. Content and repetition splits are shown as separate row-pairs inside one text-following table and one constraints table. """ tf_splits: list[tuple[str, list[dict[str, Any]]]] = [] c_splits: list[tuple[str, list[dict[str, Any]]]] = [] for split in SPLITS: split_tables = tables.get(split, {}) tf_rows = split_tables.get("text_following", []) c_rows = split_tables.get("constraints", []) if tf_rows and len(tf_rows) == 3: tf_splits.append((split.capitalize(), tf_rows)) if c_rows and len(c_rows) == 2: c_splits.append((split.capitalize(), c_rows)) if tf_splits: _print_tf_formatted(tf_splits, "Text-Following Evaluation") if c_splits: _print_c_formatted(c_splits, "Constrained Evaluation") def _build_summary(root: Path) -> dict[str, Any]: testcase_dirs = _discover_testcase_dirs(root) if not testcase_dirs: raise SystemExit( f"No testcase folders found under {root} (expected folders containing meta.json + motion.npz + gt_motion.npz samples)." ) missing_results: list[Path] = [] for testcase_dir in testcase_dirs: result_path = _expected_result_path(testcase_dir) if not result_path.is_file(): missing_results.append(result_path) if missing_results: missing_text = "\n".join(str(path) for path in missing_results) raise SystemExit(f"Missing {len(missing_results)} testcase result JSON files:\n{missing_text}") row_acc: dict[tuple[str, str], dict[str, Any]] = {} for split in SPLITS: for category in ROW_CATEGORIES: row_acc[(split, category)] = { "num_testcases": 0, "num_motions": 0.0, "per_motion_mean_weighted_sum": {}, "per_motion_mean_weight": {}, "per_motion_mean_gt_weighted_sum": {}, "per_motion_mean_gt_weight": {}, "tmr_weighted_sum": {}, "tmr_weight": {}, } for testcase_dir in testcase_dirs: split, category = _parse_testcase_key(root, testcase_dir) result_path = _expected_result_path(testcase_dir) num_motions, per_motion_mean_gen, per_motion_mean_gt, tmr = _load_result_row(result_path) acc = row_acc[(split, category)] acc["num_testcases"] += 1 acc["num_motions"] += num_motions _accumulate_weighted( acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"], per_motion_mean_gen, num_motions, ) if per_motion_mean_gt: _accumulate_weighted( acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"], per_motion_mean_gt, num_motions, ) if tmr: _accumulate_weighted( acc["tmr_weighted_sum"], acc["tmr_weight"], tmr, num_motions, ) rows: list[dict[str, Any]] = [] for split in SPLITS: for category in ROW_CATEGORIES: acc = row_acc[(split, category)] tmr_avg = _to_averages(acc["tmr_weighted_sum"], acc["tmr_weight"]) if acc["tmr_weight"] else {} per_motion_gt_avg = _to_averages(acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"]) row_dict: dict[str, Any] = { "split": split, "category": category, "num_testcases": acc["num_testcases"], "num_motions": int(acc["num_motions"]), "per_motion_mean": _to_averages(acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"]), "tmr": tmr_avg, } if per_motion_gt_avg: row_dict["per_motion_mean_gt"] = per_motion_gt_avg rows.append(row_dict) # Combined constraints row for this split. withtext = row_acc[(split, "constraints_withtext")] notext = row_acc[(split, "constraints_notext")] combined_per_motion = defaultdict(float) combined_per_motion_weight = defaultdict(float) combined_per_motion_gt = defaultdict(float) combined_per_motion_gt_weight = defaultdict(float) combined_tmr = defaultdict(float) combined_tmr_weight = defaultdict(float) for sum_key, weight_key, sum_acc, weight_acc in ( ("per_motion_mean_weighted_sum", "per_motion_mean_weight", combined_per_motion, combined_per_motion_weight), ("per_motion_mean_gt_weighted_sum", "per_motion_mean_gt_weight", combined_per_motion_gt, combined_per_motion_gt_weight), ("tmr_weighted_sum", "tmr_weight", combined_tmr, combined_tmr_weight), ): for src in (withtext, notext): for k, v in src[sum_key].items(): sum_acc[k] += v for k, w in src[weight_key].items(): weight_acc[k] += w combined_tmr_avg = _to_averages(dict(combined_tmr), dict(combined_tmr_weight)) if combined_tmr_weight else {} combined_gt_avg = _to_averages(dict(combined_per_motion_gt), dict(combined_per_motion_gt_weight)) combined_row: dict[str, Any] = { "split": split, "category": "constraints", "num_testcases": withtext["num_testcases"] + notext["num_testcases"], "num_motions": int(withtext["num_motions"] + notext["num_motions"]), "per_motion_mean": _to_averages(dict(combined_per_motion), dict(combined_per_motion_weight)), "tmr": combined_tmr_avg, } if combined_gt_avg: combined_row["per_motion_mean_gt"] = combined_gt_avg rows.append(combined_row) tables = _build_tables(row_acc) return { "folder": str(root), "num_testcases": len(testcase_dirs), "rows": rows, "tables": tables, } def main() -> None: parser = argparse.ArgumentParser( description=("Validate testcase XXX.json result files and aggregate averages by split/category.") ) parser.add_argument( "folder", type=Path, help="Testsuite root folder (contains content/ and repetition/).", ) parser.add_argument( "--output", type=Path, default=None, help="Optional output JSON path. Default: /summary_rows.json", ) parser.add_argument( "--format", choices=["terminal", "md"], default="terminal", dest="table_format", help="Table output format: 'terminal' (default) for fixed-width tables, 'md' for markdown.", ) args = parser.parse_args() folder = args.folder.resolve() if not folder.is_dir(): raise SystemExit(f"Folder does not exist: {folder}") summary = _build_summary(folder) out_path = args.output.resolve() if args.output else folder / "summary_rows.json" out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8") print(f"Wrote aggregated summary: {out_path}") print(f"Rows: {len(summary['rows'])}, testcases: {summary['num_testcases']}") if args.table_format == "md": _print_formatted_gt_method_md(summary["tables"]) else: _print_formatted_gt_method(summary["tables"]) if __name__ == "__main__": main() ================================================ FILE: docker-compose.yaml ================================================ services: text-encoder: build: context: . dockerfile: Dockerfile image: kimodo:1.0 container_name: text-encoder working_dir: /workspace command: python -m kimodo.scripts.run_text_encoder_server volumes: - ./:/workspace # Cache HF downloads in host "system-wide" Hugging Face cache. - ${HOME}/.cache/huggingface:/workspace/.cache/huggingface # Mount the host HF auth token at the standard cache location in-container. - ${HOME}/.cache/huggingface/token:/workspace/.cache/huggingface/token:ro # expose to your host browser ports: - "9550:9550" environment: # Make Gradio reachable from other containers # - GRADIO_SERVER_NAME=0.0.0.0 # - GRADIO_SERVER_PORT=9550 - HF_HOME=/workspace/.cache/huggingface # Host user mapping (for non-root ownership + proper shell prompt) - HOST_USER=${USER:-user} # GPU - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility shm_size: "16gb" ipc: host # Wait until Gradio responds on HTTP healthcheck: test: ["CMD", "bash", "-lc", "curl -fsS http://localhost:9550/ > /dev/null"] interval: 3s timeout: 2s retries: 40 deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] demo: build: context: . dockerfile: Dockerfile image: kimodo:1.0 container_name: demo working_dir: /workspace command: python -m kimodo.demo volumes: - ./:/workspace - ${HOME}/.cache/huggingface:/workspace/.cache/huggingface - ${HOME}/.cache/huggingface/token:/workspace/.cache/huggingface/token:ro # Explicit checkpoint mount (avoids surprises if the repo bind mount isn't what you expect). - ./checkpoints:/workspace/checkpoints:ro ports: - "${SERVER_PORT:-7860}:${SERVER_PORT:-7860}" environment: # Point the model at the text-encoder service. - TEXT_ENCODER_URL=http://text-encoder:9550/ # Make checkpoint paths robust (Hydra config reads this). - SERVER_PORT=${SERVER_PORT:-7860} - HF_HOME=/workspace/.cache/huggingface # Host user mapping (for non-root ownership + proper shell prompt) - HOST_USER=${USER:-user} # GPU - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility shm_size: "16gb" ipc: host depends_on: text-encoder: condition: service_healthy deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] ================================================ FILE: docker_requirements.in ================================================ # # Human-maintained direct dependencies (top-level). # Use `uv` to compile this into a fully pinned `requirements.txt` lockfile. # # IMPORTANT: # - We intentionally do NOT list `torch` here because the Docker image base # (`nvcr.io/nvidia/pytorch`) already provides it. Installing torch via pip # during image build is slow and can lead to ABI/CUDA mismatches. # - If you are NOT using Docker, install an appropriate PyTorch build separately. # # Config / wiring hydra-core>=1.3 omegaconf>=2.3 # Core numerics numpy>=1.23,<2 scipy>=1.10,<2 # Model / embeddings # NOTE: `kimodo/model/llm2vec` is has only been tested with transformers==5.1.0 transformers==5.1.0 urllib3>=2.6.3 boto3 peft>=0.12 einops>=0.7 # Misc tqdm>=4.0 packaging>=21.0 pydantic>=2.0 # UI / client filelock>=3.20.3 gradio>=6.8.0 gradio_client>=1.0 # Visualization trimesh>=3.21.7 scenepic>=1.1.0 pillow>=9.0 av>=16.1.0 py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git # Local packages (editable installs for viser and kimodo; MotionCorrection non-editable) ./MotionCorrection -e . -e ./kimodo-viser ================================================ FILE: docker_requirements.txt ================================================ # This file was autogenerated by uv via the following command: # NOTE: `torch` (and its CUDA wheels) are intentionally omitted from this lockfile. # The Docker base image (nvcr.io/nvidia/pytorch) already provides a tested PyTorch build. # # uv pip compile docker_requirements.in -o docker_requirements.txt --python-version 3.10 --python-platform x86_64-manylinux2014 -e . # via -r docker_requirements.in -e ./kimodo-viser # via -r docker_requirements.in py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git # via -r docker_requirements.in accelerate==1.13.0 # via peft aiofiles==24.1.0 # via gradio annotated-doc==0.0.4 # via # fastapi # typer annotated-types==0.7.0 # via pydantic antlr4-python3-runtime==4.9.3 # via # hydra-core # omegaconf anyio==4.12.1 # via # gradio # httpx # starlette attrs==25.4.0 # via # jsonschema # referencing av==16.1.0 # via # -r docker_requirements.in # kimodo boto3==1.42.66 # via # -r docker_requirements.in # kimodo botocore==1.42.66 # via # boto3 # s3transfer brotli==1.2.0 # via gradio certifi==2026.2.25 # via # httpcore # httpx # requests charset-normalizer==3.4.5 # via # requests # trimesh click==8.3.1 # via # typer # uvicorn colorlog==6.10.1 # via trimesh einops==0.8.2 # via # -r docker_requirements.in # kimodo embreex==2.17.7.post7 # via trimesh exceptiongroup==1.3.1 # via anyio fastapi==0.135.1 # via gradio ffmpy==1.0.0 # via gradio filelock==3.25.2 # via # -r docker_requirements.in # huggingface-hub # kimodo # torch fsspec==2026.2.0 # via # gradio-client # huggingface-hub # torch gradio==6.9.0 # via # -r docker_requirements.in # kimodo gradio-client==2.3.0 # via # -r docker_requirements.in # gradio # kimodo groovy==0.1.2 # via gradio h11==0.16.0 # via # httpcore # uvicorn hf-xet==1.4.0 # via huggingface-hub httpcore==1.0.9 # via httpx httpx==0.28.1 # via # gradio # gradio-client # huggingface-hub # safehttpx # trimesh huggingface-hub==1.6.0 # via # accelerate # gradio # gradio-client # peft # tokenizers # transformers hydra-core==1.3.2 # via # -r docker_requirements.in # kimodo idna==3.11 # via # anyio # httpx # requests imageio==2.37.3 # via viser jinja2==3.1.6 # via # gradio # torch jmespath==1.1.0 # via # boto3 # botocore jsonschema==4.26.0 # via trimesh jsonschema-specifications==2025.9.1 # via jsonschema lxml==6.0.2 # via # trimesh # yourdfpy manifold3d==3.4.0 # via trimesh mapbox-earcut==2.0.0 # via trimesh markdown-it-py==4.0.0 # via rich markupsafe==3.0.3 # via # gradio # jinja2 mdurl==0.1.2 # via markdown-it-py ./MotionCorrection # via -r docker_requirements.in msgspec==0.20.0 # via viser nodeenv==1.10.0 # via viser numpy==1.26.4 # via # -r docker_requirements.in # accelerate # embreex # gradio # imageio # kimodo # manifold3d # mapbox-earcut # motion-correction # pandas # peft # pycollada # scenepic # scipy # shapely # transformers # trimesh # vhacdx # viser # yourdfpy omegaconf==2.3.0 # via # -r docker_requirements.in # hydra-core # kimodo orjson==3.11.7 # via gradio packaging==26.0 # via # -r docker_requirements.in # accelerate # gradio # gradio-client # huggingface-hub # hydra-core # kimodo # peft # transformers pandas==2.3.3 # via gradio peft==0.18.1 # via # -r docker_requirements.in # kimodo pillow==12.1.1 # via # -r docker_requirements.in # gradio # imageio # kimodo # scenepic # trimesh psutil==7.2.2 # via # accelerate # peft pycollada==0.9.3 # via trimesh pydantic==2.12.5 # via # -r docker_requirements.in # fastapi # gradio # kimodo pydantic-core==2.41.5 # via pydantic pydub==0.25.1 # via gradio pygments==2.19.2 # via rich python-dateutil==2.9.0.post0 # via # botocore # pandas # pycollada python-multipart==0.0.22 # via gradio pytz==2026.1.post1 # via # gradio # pandas pyyaml==6.0.3 # via # accelerate # gradio # huggingface-hub # omegaconf # peft # transformers referencing==0.37.0 # via # jsonschema # jsonschema-specifications regex==2026.2.28 # via transformers requests==2.32.5 # via viser rich==14.3.3 # via # typer # viser rpds-py==0.30.0 # via # jsonschema # referencing rtree==1.4.1 # via trimesh s3transfer==0.16.0 # via boto3 safehttpx==0.1.7 # via gradio safetensors==0.7.0 # via # accelerate # peft # transformers scenepic==1.1.2 # via # -r docker_requirements.in # kimodo scipy==1.15.3 # via # -r docker_requirements.in # kimodo # scenepic # trimesh semantic-version==2.10.0 # via gradio shapely==2.1.2 # via trimesh shellingham==1.5.4 # via typer six==1.17.0 # via # python-dateutil # yourdfpy starlette==0.52.1 # via # fastapi # gradio svg-path==7.0 # via trimesh tokenizers==0.22.2 # via transformers tomlkit==0.13.3 # via gradio tqdm==4.67.3 # via # -r docker_requirements.in # huggingface-hub # kimodo # peft # transformers # viser transformers==5.1.0 # via # -r docker_requirements.in # kimodo # peft trimesh==4.11.3 # via # -r docker_requirements.in # kimodo # viser # yourdfpy typer==0.24.1 # via # gradio # huggingface-hub # typer-slim typer-slim==0.24.0 # via transformers typing-extensions==4.15.0 # via # anyio # exceptiongroup # fastapi # gradio # gradio-client # huggingface-hub # pydantic # pydantic-core # referencing # starlette # torch # typing-inspection # uvicorn # viser typing-inspection==0.4.2 # via # fastapi # pydantic tzdata==2025.3 # via pandas urllib3==2.6.3 # via # -r docker_requirements.in # botocore # kimodo # requests uvicorn==0.41.0 # via gradio vhacdx==0.0.10 # via trimesh websockets==15.0.1 # via viser xxhash==3.6.0 # via trimesh yourdfpy==0.0.60 # via viser ================================================ FILE: docs/.gitattributes ================================================ source/_static/quick_tour.mp4 filter=lfs diff=lfs merge=lfs -text ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build .PHONY: help Makefile apidoc # Catch-all target: route all unknown targets to Sphinx %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) apidoc: @$(SPHINXBUILD) -b html -q "$(SOURCEDIR)" "$(BUILDDIR)" >/dev/null 2>&1 || true @sphinx-apidoc -o "$(SOURCEDIR)/api_reference/_generated" -t "$(SOURCEDIR)/_templates/apidoc" ../kimodo ../kimodo/**/tests* ../kimodo/**/test* -f ================================================ FILE: docs/README.md ================================================ # Documentation ## Local build Install doc dependencies: ```bash pip install -r docs/requirements.txt ``` Build HTML: ```bash cd docs make html ``` Open the output at `docs/build/html/index.html`. ## API reference generation Generate API stubs from the Python packages: ```bash cd docs make apidoc make html ``` Note: generated stubs are written to `docs/source/api_reference/_generated` and are not included in the default navigation. Add them to a toctree if you want to expose them. ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 set SPHINXOPTS= set SPHINXBUILD=sphinx-build set SOURCEDIR=source set BUILDDIR=build if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: docs/requirements.txt ================================================ sphinx>=7.0,<9.0 nvidia-sphinx-theme sphinx-copybutton myst-parser sphinx-design ================================================ FILE: docs/source/_static/custom.css ================================================ .hero { padding: 2.5rem 2rem; border-radius: 12px; background: linear-gradient(135deg, #0f1a0c 0%, #1c2b16 55%, #76b900 100%); color: #f8f9fb; margin: 1.5rem 0 2rem 0; } .hero-title { font-size: 2.2rem; margin: 0 0 0.6rem 0; } .hero-subtitle { font-size: 1.1rem; margin: 0 0 1.2rem 0; opacity: 0.9; } .hero-actions a { display: inline-block; margin-right: 0.8rem; padding: 0.5rem 0.9rem; border-radius: 6px; background: #76b900; color: #0f1a0c; text-decoration: none; font-weight: 600; } .hero-actions a.secondary { background: transparent; color: #f8f9fb; border: 1px solid #f8f9fb; } .card-grid { display: grid; gap: 1rem; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); margin: 1.5rem 0 2rem 0; } .card { border: 1px solid rgba(0, 0, 0, 0.08); border-radius: 10px; padding: 1rem 1.2rem; background: #ffffff; } .card h3 { margin-top: 0; margin-bottom: 0.4rem; } .card p { margin: 0; color: #3c4758; } .quick-links { display: flex; flex-wrap: wrap; gap: 0.8rem; margin: 1rem 0 2rem 0; } .quick-links a { display: inline-block; padding: 0.4rem 0.8rem; border-radius: 999px; background: #edf2f7; color: #1a202c; text-decoration: none; font-weight: 600; } ================================================ FILE: docs/source/_templates/apidoc/module.rst.jinja ================================================ {%- if show_headings %} {{- [basename, "module"] | join(' ') | e | heading }} {% endif -%} .. automodule:: {{ qualname }} {%- set preferred_order = ['members', 'undoc-members', 'show-inheritance'] %} {%- for option in preferred_order %} {%- if option in automodule_options %} :{{ option }}: {%- endif %} {%- endfor %} {%- for option in automodule_options %} {%- if option not in preferred_order %} :{{ option }}: {%- endif %} {%- endfor %} ================================================ FILE: docs/source/_templates/apidoc/package.rst.jinja ================================================ {%- set preferred_order = ['members', 'undoc-members', 'show-inheritance'] %} {%- macro automodule(modname, options) -%} .. automodule:: {{ modname }} {%- for option in preferred_order %} {%- if option in options %} :{{ option }}: {%- endif %} {%- endfor %} {%- for option in options %} {%- if option not in preferred_order %} :{{ option }}: {%- endif %} {%- endfor %} {%- endmacro %} {%- macro toctree(docnames) -%} .. toctree:: :maxdepth: {{ maxdepth }} {% for docname in docnames %} {{ docname }} {%- endfor %} {%- endmacro %} {%- if is_namespace %} {{- [pkgname, "namespace"] | join(" ") | e | heading }} {% else %} {{- [pkgname, "package"] | join(" ") | e | heading }} {% endif %} {%- if is_namespace %} .. py:module:: {{ pkgname }} {% endif %} {%- if modulefirst and not is_namespace %} {{ automodule(pkgname, automodule_options) }} {% endif %} {%- if subpackages %} Subpackages ----------- {{ toctree(subpackages) }} {% endif %} {%- if submodules %} Submodules ---------- {% if separatemodules %} {{ toctree(submodules) }} {% else %} {%- for submodule in submodules %} {% if show_headings %} {{- [submodule, "module"] | join(" ") | e | heading(2) }} {% endif %} {{ automodule(submodule, automodule_options) }} {% endfor %} {%- endif %} {%- endif %} {%- if not modulefirst and not is_namespace %} Module contents --------------- {{ automodule(pkgname, automodule_options) }} {% endif %} ================================================ FILE: docs/source/api_reference/_generated/kimodo.demo.rst ================================================ kimodo.demo package =================== Submodules ---------- kimodo.demo.app module ---------------------- .. automodule:: kimodo.demo.app :members: :undoc-members: :show-inheritance: kimodo.demo.config module ------------------------- .. automodule:: kimodo.demo.config :members: :undoc-members: :show-inheritance: kimodo.demo.embedding\_cache module ----------------------------------- .. automodule:: kimodo.demo.embedding_cache :members: :undoc-members: :show-inheritance: kimodo.demo.generation module ----------------------------- .. automodule:: kimodo.demo.generation :members: :undoc-members: :show-inheritance: kimodo.demo.queue\_manager module --------------------------------- .. automodule:: kimodo.demo.queue_manager :members: :undoc-members: :show-inheritance: kimodo.demo.state module ------------------------ .. automodule:: kimodo.demo.state :members: :undoc-members: :show-inheritance: kimodo.demo.ui module --------------------- .. automodule:: kimodo.demo.ui :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.demo :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.exports.rst ================================================ kimodo.exports package ====================== Submodules ---------- kimodo.exports.bvh module ------------------------- .. automodule:: kimodo.exports.bvh :members: :undoc-members: :show-inheritance: kimodo.exports.motion\_convert\_lib module ------------------------------------------ .. automodule:: kimodo.exports.motion_convert_lib :members: :undoc-members: :show-inheritance: kimodo.exports.motion\_formats module ------------------------------------- .. automodule:: kimodo.exports.motion_formats :members: :undoc-members: :show-inheritance: kimodo.exports.motion\_io module -------------------------------- .. automodule:: kimodo.exports.motion_io :members: :undoc-members: :show-inheritance: kimodo.exports.mujoco module ---------------------------- .. automodule:: kimodo.exports.mujoco :members: :undoc-members: :show-inheritance: kimodo.exports.smplx module --------------------------- .. automodule:: kimodo.exports.smplx :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.exports :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.metrics.rst ================================================ kimodo.metrics package ====================== Submodules ---------- kimodo.metrics.base module -------------------------- .. automodule:: kimodo.metrics.base :members: :undoc-members: :show-inheritance: kimodo.metrics.constraints module --------------------------------- .. automodule:: kimodo.metrics.constraints :members: :undoc-members: :show-inheritance: kimodo.metrics.foot\_skate module --------------------------------- .. automodule:: kimodo.metrics.foot_skate :members: :undoc-members: :show-inheritance: kimodo.metrics.tmr module ------------------------- .. automodule:: kimodo.metrics.tmr :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.metrics :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.model.llm2vec.models.rst ================================================ kimodo.model.llm2vec.models package =================================== Submodules ---------- kimodo.model.llm2vec.models.attn\_mask\_utils module ---------------------------------------------------- .. automodule:: kimodo.model.llm2vec.models.attn_mask_utils :members: :undoc-members: :show-inheritance: kimodo.model.llm2vec.models.bidirectional\_llama module ------------------------------------------------------- .. automodule:: kimodo.model.llm2vec.models.bidirectional_llama :members: :undoc-members: :show-inheritance: kimodo.model.llm2vec.models.utils module ---------------------------------------- .. automodule:: kimodo.model.llm2vec.models.utils :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.model.llm2vec.models :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.model.llm2vec.rst ================================================ kimodo.model.llm2vec package ============================ Subpackages ----------- .. toctree:: :maxdepth: 4 kimodo.model.llm2vec.models Submodules ---------- kimodo.model.llm2vec.llm2vec module ----------------------------------- .. automodule:: kimodo.model.llm2vec.llm2vec :members: :undoc-members: :show-inheritance: kimodo.model.llm2vec.llm2vec\_wrapper module -------------------------------------------- .. automodule:: kimodo.model.llm2vec.llm2vec_wrapper :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.model.llm2vec :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.model.rst ================================================ kimodo.model package ==================== Subpackages ----------- .. toctree:: :maxdepth: 4 kimodo.model.llm2vec Submodules ---------- kimodo.model.backbone module ---------------------------- .. automodule:: kimodo.model.backbone :members: :undoc-members: :show-inheritance: kimodo.model.cfg module ----------------------- .. automodule:: kimodo.model.cfg :members: :undoc-members: :show-inheritance: kimodo.model.common module -------------------------- .. automodule:: kimodo.model.common :members: :undoc-members: :show-inheritance: kimodo.model.diffusion module ----------------------------- .. automodule:: kimodo.model.diffusion :members: :undoc-members: :show-inheritance: kimodo.model.kimodo\_model module --------------------------------- .. automodule:: kimodo.model.kimodo_model :members: :undoc-members: :show-inheritance: kimodo.model.load\_model module ------------------------------- .. automodule:: kimodo.model.load_model :members: :undoc-members: :show-inheritance: kimodo.model.loading module --------------------------- .. automodule:: kimodo.model.loading :members: :undoc-members: :show-inheritance: kimodo.model.registry module ---------------------------- .. automodule:: kimodo.model.registry :members: :undoc-members: :show-inheritance: kimodo.model.text\_encoder\_api module -------------------------------------- .. automodule:: kimodo.model.text_encoder_api :members: :undoc-members: :show-inheritance: kimodo.model.tmr module ----------------------- .. automodule:: kimodo.model.tmr :members: :undoc-members: :show-inheritance: kimodo.model.twostage\_denoiser module -------------------------------------- .. automodule:: kimodo.model.twostage_denoiser :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.model :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.motion_rep.reps.rst ================================================ kimodo.motion\_rep.reps package =============================== Submodules ---------- kimodo.motion\_rep.reps.base module ----------------------------------- .. automodule:: kimodo.motion_rep.reps.base :members: :undoc-members: :show-inheritance: kimodo.motion\_rep.reps.kimodo\_motionrep module ------------------------------------------------ .. automodule:: kimodo.motion_rep.reps.kimodo_motionrep :members: :undoc-members: :show-inheritance: kimodo.motion\_rep.reps.tmr\_motionrep module --------------------------------------------- .. automodule:: kimodo.motion_rep.reps.tmr_motionrep :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.motion_rep.reps :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.motion_rep.rst ================================================ kimodo.motion\_rep package ========================== Subpackages ----------- .. toctree:: :maxdepth: 4 kimodo.motion_rep.reps Submodules ---------- kimodo.motion\_rep.conditioning module -------------------------------------- .. automodule:: kimodo.motion_rep.conditioning :members: :undoc-members: :show-inheritance: kimodo.motion\_rep.feature\_utils module ---------------------------------------- .. automodule:: kimodo.motion_rep.feature_utils :members: :undoc-members: :show-inheritance: kimodo.motion\_rep.feet module ------------------------------ .. automodule:: kimodo.motion_rep.feet :members: :undoc-members: :show-inheritance: kimodo.motion\_rep.smooth\_root module -------------------------------------- .. automodule:: kimodo.motion_rep.smooth_root :members: :undoc-members: :show-inheritance: kimodo.motion\_rep.stats module ------------------------------- .. automodule:: kimodo.motion_rep.stats :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.motion_rep :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.rst ================================================ kimodo package ============== Subpackages ----------- .. toctree:: :maxdepth: 4 kimodo.demo kimodo.exports kimodo.metrics kimodo.model kimodo.motion_rep kimodo.scripts kimodo.skeleton kimodo.viz Submodules ---------- kimodo.assets module -------------------- .. automodule:: kimodo.assets :members: :undoc-members: :show-inheritance: kimodo.constraints module ------------------------- .. automodule:: kimodo.constraints :members: :undoc-members: :show-inheritance: kimodo.geometry module ---------------------- .. automodule:: kimodo.geometry :members: :undoc-members: :show-inheritance: kimodo.meta module ------------------ .. automodule:: kimodo.meta :members: :undoc-members: :show-inheritance: kimodo.postprocess module ------------------------- .. automodule:: kimodo.postprocess :members: :undoc-members: :show-inheritance: kimodo.sanitize module ---------------------- .. automodule:: kimodo.sanitize :members: :undoc-members: :show-inheritance: kimodo.tools module ------------------- .. automodule:: kimodo.tools :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.scripts.rst ================================================ kimodo.scripts package ====================== Submodules ---------- kimodo.scripts.generate module ------------------------------ .. automodule:: kimodo.scripts.generate :members: :undoc-members: :show-inheritance: kimodo.scripts.gradio\_theme module ----------------------------------- .. automodule:: kimodo.scripts.gradio_theme :members: :undoc-members: :show-inheritance: kimodo.scripts.lock\_requirements module ---------------------------------------- .. automodule:: kimodo.scripts.lock_requirements :members: :undoc-members: :show-inheritance: kimodo.scripts.motion\_convert module ------------------------------------- .. automodule:: kimodo.scripts.motion_convert :members: :undoc-members: :show-inheritance: kimodo.scripts.mujoco\_load module ---------------------------------- .. automodule:: kimodo.scripts.mujoco_load :members: :undoc-members: :show-inheritance: kimodo.scripts.run\_text\_encoder\_server module ------------------------------------------------ .. automodule:: kimodo.scripts.run_text_encoder_server :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.scripts :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.skeleton.rst ================================================ kimodo.skeleton package ======================= Submodules ---------- kimodo.skeleton.base module --------------------------- .. automodule:: kimodo.skeleton.base :members: :undoc-members: :show-inheritance: kimodo.skeleton.bvh module -------------------------- .. automodule:: kimodo.skeleton.bvh :members: :undoc-members: :show-inheritance: kimodo.skeleton.definitions module ---------------------------------- .. automodule:: kimodo.skeleton.definitions :members: :undoc-members: :show-inheritance: kimodo.skeleton.kinematics module --------------------------------- .. automodule:: kimodo.skeleton.kinematics :members: :undoc-members: :show-inheritance: kimodo.skeleton.registry module ------------------------------- .. automodule:: kimodo.skeleton.registry :members: :undoc-members: :show-inheritance: kimodo.skeleton.transforms module --------------------------------- .. automodule:: kimodo.skeleton.transforms :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.skeleton :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/kimodo.viz.rst ================================================ kimodo.viz package ================== Submodules ---------- kimodo.viz.constraint\_ui module -------------------------------- .. automodule:: kimodo.viz.constraint_ui :members: :undoc-members: :show-inheritance: kimodo.viz.coords module ------------------------ .. automodule:: kimodo.viz.coords :members: :undoc-members: :show-inheritance: kimodo.viz.g1\_rig module ------------------------- .. automodule:: kimodo.viz.g1_rig :members: :undoc-members: :show-inheritance: kimodo.viz.gui module --------------------- .. automodule:: kimodo.viz.gui :members: :undoc-members: :show-inheritance: kimodo.viz.playback module -------------------------- .. automodule:: kimodo.viz.playback :members: :undoc-members: :show-inheritance: kimodo.viz.scene module ----------------------- .. automodule:: kimodo.viz.scene :members: :undoc-members: :show-inheritance: kimodo.viz.smplx\_skin module ----------------------------- .. automodule:: kimodo.viz.smplx_skin :members: :undoc-members: :show-inheritance: kimodo.viz.soma\_layer\_skin module ----------------------------------- .. automodule:: kimodo.viz.soma_layer_skin :members: :undoc-members: :show-inheritance: kimodo.viz.soma\_skin module ---------------------------- .. automodule:: kimodo.viz.soma_skin :members: :undoc-members: :show-inheritance: kimodo.viz.viser\_utils module ------------------------------ .. automodule:: kimodo.viz.viser_utils :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: kimodo.viz :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/_generated/modules.rst ================================================ kimodo ====== .. toctree:: :maxdepth: 4 kimodo ================================================ FILE: docs/source/api_reference/constraints.rst ================================================ Constraints =========== Constraint definitions and utilities. .. automodule:: kimodo.constraints :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/exports.rst ================================================ Exports ======= Export utilities for common formats. .. automodule:: kimodo.exports.bvh :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.exports.mujoco :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.exports.smplx :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/index.rst ================================================ API Reference ============= This section contains the API documentation for Kimodo, organized by domain. .. toctree:: :maxdepth: 2 :caption: Core Modules model motion_rep constraints exports viz utilities post-processing ================================================ FILE: docs/source/api_reference/model.rst ================================================ Model ===== Core model architecture, diffusion logic, and text encoders. Kimodo Model ------------ .. automodule:: kimodo.model.kimodo_model :members: :undoc-members: :special-members: __call__ Denoiser and Backbone --------------------- .. automodule:: kimodo.model.twostage_denoiser :members: :undoc-members: .. automodule:: kimodo.model.backbone :members: :undoc-members: Classifier-Free Guidance ------------------------ .. automodule:: kimodo.model.cfg :members: :undoc-members: Model Loading ------------- .. automodule:: kimodo.model.load_model :members: :undoc-members: Text Encoder ------------ .. automodule:: kimodo.model.text_encoder_api :members: :undoc-members: :special-members: __call__ ================================================ FILE: docs/source/api_reference/motion_rep.rst ================================================ Motion Representation ===================== Motion representation utilities and kinematics helpers. Skeleton -------- .. automodule:: kimodo.skeleton :members: :undoc-members: :show-inheritance: Forward Kinematics ------------------ .. automodule:: kimodo.skeleton.kinematics :members: :undoc-members: :show-inheritance: Motion Representations ---------------------- .. automodule:: kimodo.motion_rep.reps.base :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.motion_rep.reps.kimodo_motionrep :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.motion_rep.reps.tmr_motionrep :members: :undoc-members: :show-inheritance: Utilities --------- .. automodule:: kimodo.motion_rep.feet :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.motion_rep.stats :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.motion_rep.smooth_root :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/post-processing.rst ================================================ Post-Processing Bindings ======================== .. automodule:: kimodo.postprocess :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/utilities.rst ================================================ Utilities ========= General utilities used across the codebase. .. automodule:: kimodo.tools :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.geometry :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.sanitize :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/api_reference/viz.rst ================================================ Visualization ============= Visualization helpers for rendering skeletons and meshes. .. automodule:: kimodo.viz.g1_rig :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.viz.smplx_skin :members: :undoc-members: :show-inheritance: .. automodule:: kimodo.viz.viser_utils :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/benchmark/introduction.md ================================================ # Benchmark Introduction We provide a benchmark to evaluate text-to-motion and constrained motion generation on a shared test suite. For reproducibility, all test content is stored on disk as folders and files, so anyone can run exactly the same cases. The benchmark test suite is available to download from HuggingFace at [`nvidia/Kimodo-Motion-Gen-Benchmark`](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) and is currently set up for use with models trained on the [SOMA](https://github.com/NVlabs/SOMA-X) body skeleton. The benchmark contains text prompts, durations, and constraint configurations for a variety of test cases, but **not** the ground-truth motion data itself. The ground-truth motions are derived from the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed), which has its own license you should consider. So to construct the full benchmark motions, you must download the BONES-SEED dataset separately and run our `create_benchmark` script to populate the test suite with ground-truth motions. Constructing the benchmark with `create_benchmark` is the first step in the full [Evaluation Pipeline](pipeline.md), which is described in detail on the next page. In addition to the benchmark test cases, we provide code to run generation with Kimodo and compute a variety of [metrics](metrics.md) measuring motion quality, text alignment, and constraint following. While this open-sourced public test suite is not the exact same used in the [Kimodo tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) (Sec. 6.1), the evaluation metrics are the same and evaluation methodology is similar. On this page, we describe the overall structure of the test suite and details of the different test cases. Then in subsequent pages, we describe how to run the full [evaluation pipeline](pipeline.md), detail the [metrics](metrics.md), and finally provide the [results](results.md) of Kimodo-SOMA-RP and Kimodo-SOMA-SEED on the benchmark. ## Dataset Splits To evaluate a model on the benchmark, it should be trained with the [provided splits](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark/tree/main/splits) for the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed). The different splits are defined in: - `train_split_paths.txt` - filenames of training data - `test_content_split_paths.txt` - filenames for test split containing new semantic "content". This split contains motions with `content_name` (from the BONES-SEED metadata) that are not seen in the training split. This tests model generalization to new semantic motion types, e.g. for text-to-motion generalization. - `test_repetition_split_paths.txt` - filenames for test split containing new motions from content that was seen in training. This split contains motions where the `content_name` is contained in the training split, but the exact motion itself was not seen. This tests a model's ability to generalize to novel performances of a familiar motion type, e.g., for constraint-following generalization. The training split should be used for training, while the two test splits (`content` and `repetition`) are used in the test suite, as described below. Note that the test cases in the benchmark do not cover the entire content and repetition test splits, instead we strategically sample a subset that maximizes content diversity. ## Test Suite Structure The full test suite contains 22,474 test cases spanning text and constraint-conditioned motion generation. The suite is organized hierarchically to logically group together test cases, so the evaluation pipeline can be run on a subset of the benchmark instead of the full thing, if desired. After the benchmark has been constructed and motions generated for the model to evaluate, a **test case** is a single folder containing: - `meta.json` (**required**): text prompt(s) and duration(s), - `constraints.json` (**optional**): constraints for controlled generation, using the [constraints format](../user_guide/constraints.md), - `gt_motion.npz` (**optional**): ground-truth/reference motion, using the [NPZ output format](../user_guide/output_formats.md), - `motion.npz` (**optional**): output of the model given the `meta.json` prompt/duration and optional `constraints.json`, using the same [NPZ output format](../user_guide/output_formats.md). In addition to being used in the evaluation pipeline, each test case can be: - loaded in the interactive demo through **Load Example** for visualization, - loaded in `kimodo_gen` with `--input_folder` for generation from folder-defined inputs. ### Benchmark Folder Hierarchy The full suite is organized as follows: ```text testsuite ├── content │ ├── constraints_notext │ │ ├── end-effectors │ │ ├── fullbody │ │ ├── mixture │ │ └── root │ ├── constraints_withtext │ │ ├── end-effectors │ │ ├── fullbody │ │ ├── mixture │ │ └── root │ └── text2motion │ ├── overview │ ├── timeline_multi │ └── timeline_single └── repetition ├── constraints_notext │ ├── end-effectors │ ├── fullbody │ ├── mixture │ └── root ├── constraints_withtext │ ├── end-effectors │ ├── fullbody │ ├── mixture │ └── root └── text2motion ├── overview ├── timeline_multi └── timeline_single ``` At the highest level, the test suite is organized by the test split used. As discussed previously, `content` refers to the test split with held out semantic categories of motion, while `repetition` refers to held out motions from semantic categories seen during training. Within each test split, test cases are organized into: * `text2motion`: test cases with only text prompts as input (no constraints) * `constraints_notext`: test cases with only constraints as input (no text prompt) * `constraints_withtext`: test cases with both prompt and constraints as input ### Text2Motion Test Cases These test cases are pure text-to-motion with no constraints as input. `text2motion` test cases exclusively use prompts derived from our [SEED timeline annotations](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations). It contains three types of test cases: * `overview`: medium-detail prompt that describes a full motion. Corresponds to `overview_description` in the [NVIDIA SEED timelines](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations) or equivalently `content_natural_desc_4` in the [BONES SEED](https://huggingface.co/datasets/bones-studio/seed) metadata. * `timeline_single`: fine-grained prompt describing a single segment of a timeline annotation. Corresponds to a single event in a SEED timeline. * `timeline_multi`: fine-grained prompt describing multiple subsequent segments of a timeline annotation. Corresponds to multiple contiguous events in a SEED timeline, which have been concatenated with an LLM to get a single natural text description. ### Constrained Test Cases Constrained test cases provide a constraint input either without a text prompt (i.e., `constraints_notext`) or with an `overview` text prompt (i.e., `constraints_withtext`). The different types of constraint categories mirror the [constraint types support by Kimodo](../key_concepts/constraints.md) and include: * `fullbody`: constrains all joint positions in the skeleton at specific frames * `end-effectors`: constraints the position and rotations of hand and/or feet joints at specific frames * `root`: constraints the 2D root position and optionally heading on a path or at specific frames * `mixture`: evaluates compositional control when multiple constraint families are combined Within each constraint type in the hierarchy are multiple subtypes that vary the constraint sparsity patterns (either in time or in space). So the hierarchy of a `constraint` folder is: ```text constraints_XX ├── end-effectors │ ├── feet_posrot # feet only constraints │ ├── hands_feet_posrot # hands + feet constraints │ └── hands_posrot # hands only constraints ├── fullbody │ ├── inbetweening # constraints at start and end only │ └── random # constraints at random frames ├── mixture │ ├── root_ee_hands_feet_posrot_fullbody # mix of (1) root trajectory, (2) hand + foot, and (3) full-body │ ├── root_ee_hands_posrot # mix of (1) root keyframe, and (2) hands │ ├── root_ee_hands_posrot_fullbody # mix of (1) root keyframe, (2) hands, and (3) full-body │ └── root_path_fullbody # mix of (1) root trajectory, and (2) full-body └── root ├── path_2dpos # root trajectory position ├── path_2dposrot # root trajecotry position + heading ├── waypoint_2dpos # root waypoint position └── waypoint_2dposrot # root waypoint position + heading ``` ### Indexed Test Cases in Leaf Folders Each leaf folder contains indexed test cases (`0000`, `0001`, `0002`, ...). For example: ```text end-effectors/feet_posrot/ ├── 0000/ ├── 0001/ ├── 0002/ ... └── 0255/ ``` Each index folder is one standalone test case with its own `meta.json`, optional `constraints.json`, optional `gt_motion.npz`, and optional `motion.npz`. ================================================ FILE: docs/source/benchmark/metrics.md ================================================ # Metrics The benchmark evaluates generated motion along three axes: - **Motion quality** -- foot-skate and contact-consistency metrics, - **Constraint following** -- position error for root, end-effector, and full-body constraints, - **Text alignment** -- TMR retrieval and distributional metrics. Metrics are implemented in `kimodo/metrics/` and orchestrated by `benchmark/evaluate_folder.py`. The protocol is aligned with the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) (Sec. 6.1, "Evaluation Metrics"). ## Evaluation Protocol The evaluation pipeline runs two passes over each group of test cases: 1. **Generated pass** -- evaluates `motion.npz` with all metrics (foot skate, contact consistency, constraint following) and, when TMR embeddings are available, computes retrieval and FID scores. 2. **Ground-truth pass** -- evaluates `gt_motion.npz` with the same motion-quality and constraint metrics. TMR retrieval metrics are not recomputed in this pass. Running both passes enables side-by-side comparison: the GT row serves as an empirical upper bound for motion quality, and deviations between GT and generated metrics highlight where the model can improve. See [Evaluation pipeline](pipeline.md) for the full workflow. ## Metrics Reference The table below lists every key written to `metrics.json`. Detailed descriptions follow in subsequent sections. | Key | Category | Unit | Direction | | --- | --- | --- | --- | | `foot_skate_from_height` | Motion quality | m/s | Lower is better | | `foot_skate_from_pred_contacts` | Motion quality | m/s | Lower is better | | `foot_skate_max_vel` | Motion quality | m/s | Lower is better | | `foot_skate_ratio` | Motion quality | ratio (0--1) | Lower is better | | `foot_contact_consistency` | Motion quality | ratio (0--1) | Higher is better | | `constraint_root2d_err` | Constraint follow | m | Lower is better | | `constraint_root2d_err_p95` | Constraint follow | m | Lower is better | | `constraint_root2d_acc` | Constraint follow | ratio (0--1) | Higher is better | | `constraint_fullbody_keyframe` | Constraint follow | m | Lower is better | | `constraint_end_effector` | Constraint follow | m | Lower is better | | `TMR/t2m_sim` | Text alignment | score (0--1) | Higher is better | | `TMR/t2m_R/R01` ... `R10` | Text alignment | % | Higher is better | | `TMR/t2m_R/MedR` | Text alignment | rank | Lower is better | | `TMR/FID/gen_text` | Text alignment | distance | Lower is better | | `TMR/FID/gen_gt` | Text alignment | distance | Lower is better | | `TMR/FID/gt_text` | Text alignment | distance | Lower is better | | `TMR/m2m_sim` | Text alignment | score (0--1) | Higher is better | | `TMR/t2m_gt_sim` | Text alignment | score (0--1) | Higher is better | | `TMR/m2m_R/R01` ... `R10` | Text alignment | % | Higher is better | | `TMR/t2m_gt_R/R01` ... `R10` | Text alignment | % | Higher is better | :::{note} Raw metric values are stored in SI units (meters for positions, m/s for velocities). The summary tables printed by `benchmark/parse_folder.py` convert constraint position errors to **cm** and foot-skate velocities to **cm/s** for readability. ::: ### Foot Skating Metrics Foot skating measures how much a foot slides along the ground when it should be in static contact with the ground. Four complementary metrics capture different aspects of this artifact. - **`foot_skate_from_height`** (m/s, lower is better): Mean velocity of the **toe joints** (left toe, right toe) on frames where the toe height is below a floor threshold (`height_thresh = 0.05 m`). This metric does not rely on predicted contact labels -- it uses a geometric criterion (Y-coordinate < threshold) to identify ground-contact frames. - **`foot_skate_from_pred_contacts`** (m/s, lower is better): Mean velocity of all **four foot joints** (left/right heel and toe) on frames where the model predicts contact via the `foot_contacts` output. Unlike `foot_skate_from_height`, this metric trusts the model's own contact predictions and measures all four foot joints rather than toes only. - **`foot_skate_max_vel`** (m/s, lower is better): Maximum velocity across all four foot joints and all time steps where predicted contact is active. This captures worst-case slip spikes that mean-based metrics can hide. - **`foot_skate_ratio`** (ratio 0--1, lower is better): Fraction of ground-contact frames where toe velocity exceeds a threshold (`vel_thresh = 0.2 m/s`). A frame counts as ground contact when the toe is below `height_thresh = 0.05 m` on both the current and the next frame. Inspired by the [GMD](https://github.com/korrawe/guided-motion-diffusion) skating metric. ### Contact Consistency Metric - **`foot_contact_consistency`** (ratio 0--1, higher is better): Agreement between the model's predicted foot contacts and a heuristic contact detector based on joint height and velocity (`vel_thresh = 0.15 m/s`, `height_thresh = 0.10 m`). Computed as accuracy (`1 - incorrect_ratio`) over all time steps and four contact channels. A score of 1.0 means perfect agreement between predicted and heuristic contacts. As noted in the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf), this metric provides important context for interpreting the contact-based foot-skate metrics above: if contact consistency is low, `foot_skate_from_pred_contacts` may be unreliable. ### Constraint-Following Metrics Constraint metrics are computed only when the test case includes a `constraints.json` file. The `ContraintFollow` metric class dispatches by [constraint type](../key_concepts/constraints.md): - **`constraint_end_effector`** (m, lower is better): Mean Euclidean distance between target end-effector positions and generated joint positions at the constrained frames. Only position-constrained joints are evaluated (rotation targets are not measured by this metric). - **`constraint_fullbody_keyframe`** (m, lower is better): Mean per-joint Euclidean distance between target and generated full-body joint positions at keyframes. The error is averaged over all joints and all keyframe frames. - **`constraint_root2d_err`** (m, lower is better): Mean 2D Euclidean distance (in the XZ ground plane) between target and generated root positions at constrained frames. - **`constraint_root2d_err_p95`** (m, lower is better): 95th percentile of the per-frame root 2D error across all samples in a group. Computed during aggregation by `evaluate_folder.py` to capture tail-end failures that the mean can mask. - **`constraint_root2d_acc`** (ratio 0--1, higher is better): Fraction of constrained root frames where the 2D position error is within a distance threshold (`root_threshold = 0.10 m`). ### TMR-Based Metrics Text alignment is evaluated using [TMR](https://mathis.petrovich.fr/tmr/) (Text-to-Motion Retrieval), a separate encoder model that maps both text and motion into a shared embedding space. TMR is not used for generation -- it is loaded only for evaluation (see `kimodo/model/tmr.py`). We release a version of TMR retrained on the full Rigplay dataset as [`TMR-SOMA-RP-v1`](https://huggingface.co/nvidia/TMR-SOMA-RP-v1). The original TMR was trained on HumanML3D; our retrained variant uses the same architecture but is trained on the Rigplay motion dataset, SOMA skeleton, and with [LLM2Vec](https://github.com/McGill-NLP/llm2vec) text embeddings. #### Similarity Scores TMR encodes each text prompt and each motion clip into a unit-length embedding vector. Cosine similarity between text and motion embeddings is rescaled to a [0, 1] range: ``` score = cosine_similarity / 2 + 0.5 ``` Three per-test-case similarity scores are recorded: - **`TMR/t2m_sim`** (0--1, higher is better): similarity between the text prompt and the generated motion. - **`TMR/m2m_sim`** (0--1, higher is better): similarity between the generated and ground-truth motions (only when GT is available). - **`TMR/t2m_gt_sim`** (0--1, higher is better): similarity between the text prompt and the GT motion (only when GT is available). #### R-precision (Retrieval Accuracy) R-precision measures whether the correct motion can be retrieved from a pool given its corresponding text query. For each text query in the evaluation group, all motions are ranked by TMR similarity. R@k is the percentage of queries where the correct motion appears in the top k results. Reported keys: `TMR/t2m_R/R01`, `R02`, `R03`, `R05`, `R10` (%), and `TMR/t2m_R/MedR` (median rank, lower is better) correspond to retrieval accuracy when using generated motions. When ground-truth motions are available, analogous retrieval metrics are computed for motion-to-GT-motion (`TMR/m2m_R/...`) and text-to-GT-motion (`TMR/t2m_gt_R/...`). :::{note} Near-duplicate text prompts can artificially penalize retrieval ranking. The evaluation handles this by grouping prompts whose text-text similarity exceeds a threshold of 0.99 and treating any motion in that group as a valid match. ::: #### FID (Frechet Inception Distance) FID measures distributional distance between two sets of TMR embeddings by fitting a multivariate Gaussian to each set and computing the Frechet distance. Three FID variants are reported: - **`TMR/FID/gen_gt`**: distance between generated-motion and GT-motion embeddings (only when GT is available). This is the FID metric that is typically reported in the motion generation literature. - **`TMR/FID/gen_text`**: distance between generated-motion embeddings and text embeddings. - **`TMR/FID/gt_text`**: distance between GT-motion and text embeddings (only when GT is available). Lower values indicate that the two distributions are more similar. FID requires at least 2 samples; groups with fewer samples report `NaN`. #### Per-Test-Case Retrieval In addition to the aggregate metrics above, each test case's `metrics.json` includes a `tmr` block with single motion retrieval results: - `t2m_rank`: the rank of the correct motion when retrieving with this test case's text query. - `top5_retrieved`: the top-5 retrieved motions (sample IDs and text prompts) for inspection. ## JSON Output Format Below is a representative `metrics.json` written by `evaluate_folder.py` for a single test case with mixed constraints (root + end-effector + full-body) and TMR embeddings: ```json { "num_motions": 1, "folder": "...", "per_motion_mean_gen": { "foot_skate_from_height": 0.3144, "foot_skate_from_pred_contacts": 0.0672, "foot_skate_max_vel": 0.2109, "foot_contact_consistency": 0.9522, "foot_skate_ratio": 0.2182, "constraint_end_effector": 0.0286, "constraint_root2d_err": 0.0534, "constraint_root2d_acc": 1.0, "constraint_fullbody_keyframe": 0.0324, "TMR/t2m_sim": 0.8209 }, "per_motion_mean_gt": { "foot_skate_from_height": 0.2361, "foot_skate_from_pred_contacts": 0.0269, "foot_skate_max_vel": 0.1459, "foot_contact_consistency": 1.0, "foot_skate_ratio": 0.1402, "constraint_end_effector": 9.82e-07, "constraint_root2d_err": 0.0407, "constraint_root2d_acc": 1.0, "constraint_fullbody_keyframe": 8.73e-07 }, "tmr": { "t2m_rank": 2, "text": "A person is swiftly performing a dance move by moving their hands and legs.", "top5_retrieved": [ { "id": "0231", "text": "A person is performing dance steps while stepping back and forward..." }, { "id": "0029", "text": "A person is swiftly performing a dance move by moving their hands and legs." } ] } } ``` Group-level aggregate JSONs (`.json`) have the same structure but with `num_motions > 1`, averaged per-motion metrics, additional keys like `constraint_root2d_err_p95`, and a `tmr` block containing the aggregate retrieval and FID scores: ```json { "num_motions": 256, "folder": "...", "per_motion_mean_gen": { "foot_skate_from_height": 0.1742, "foot_skate_from_pred_contacts": 0.0611, "foot_skate_max_vel": 0.3747, "foot_contact_consistency": 0.9483, "foot_skate_ratio": 0.1499, "constraint_end_effector": 0.0367, "constraint_root2d_err": 0.0495, "constraint_root2d_acc": 0.9212, "constraint_fullbody_keyframe": 0.0324, "constraint_root2d_err_p95": 0.1115 }, "per_motion_mean_gt": { "foot_skate_from_height": 0.1617, "foot_skate_from_pred_contacts": 0.0235, "foot_skate_max_vel": 0.1185, "foot_contact_consistency": 1.0, "foot_skate_ratio": 0.1214, "constraint_end_effector": 1.48e-06, "constraint_root2d_err": 0.0376, "constraint_root2d_acc": 1.0, "constraint_fullbody_keyframe": 1.16e-06, "constraint_root2d_err_p95": 0.0602 }, "tmr": { "TMR/t2m_sim": 0.8742, "TMR/t2m_R/R01": 75.39, "TMR/t2m_R/R02": 85.55, "TMR/t2m_R/R03": 88.28, "TMR/t2m_R/R05": 90.23, "TMR/t2m_R/R10": 93.36, "TMR/t2m_R/MedR": 1.0, "TMR/t2m_R/len": 256.0, "TMR/FID/gen_text": 0.1442, "TMR/m2m_R/R01": 94.53, "TMR/m2m_R/R02": 97.66, "TMR/m2m_R/R03": 98.05, "TMR/m2m_R/R05": 98.83, "TMR/m2m_R/R10": 99.22, "TMR/m2m_R/MedR": 1.0, "TMR/m2m_R/len": 256.0, "TMR/t2m_gt_R/R01": 80.47, "TMR/t2m_gt_R/R02": 88.28, "TMR/t2m_gt_R/R03": 91.02, "TMR/t2m_gt_R/R05": 92.58, "TMR/t2m_gt_R/R10": 94.53, "TMR/t2m_gt_R/MedR": 1.0, "TMR/t2m_gt_R/len": 256.0, "TMR/FID/gen_gt": 0.0387, "TMR/FID/gt_text": 0.1349 } } ``` ================================================ FILE: docs/source/benchmark/pipeline.md ================================================ # Evaluation Pipeline This page describes the full benchmark workflow, which uses scripts in the `benchmark` directory: 1. Build full test suite using ground-truth motions from BONES-SEED BVH data and benchmark metadata (`create_benchmark.py`), 2. Generate motions with a model for all or part of the test suite (`generate_eval.py`), 3. Compute text/motion embeddings with pre-trained TMR model (`embed_folder.py `), 4. Evaluate metrics over all generated samples (`evaluate_folder.py`), 5. Aggregate and summarize results (`parse_folder.py`). This pipeline works off-the-shelf for Kimodo models. To evaluate your own model, step (2) will need to be modified to generate with your custom model and output in the expected npz format. ## Prerequisite: Download Motion Data and Metadata The benchmark is constructed from motions in the BONES-SEED dataset and our released metadata. Make sure you have downloaded the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed) along with the metadata for the test suite from HuggingFace at [`nvidia/Kimodo-Motion-Gen-Benchmark`](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark). The `testsuite` folder from the downloaded metadata contains the directory structure described in the [benchmark introduction](introduction.md) with `meta.json`, `seed_motion.json`, and `seed_constraints.json` metadata files in the leaf folders. These metadata files contain info about the text prompts, durations, and constraint definitions for each test case. The first two steps of the evaluation pipeline will create the following in the leaf folders to prepare for computing metrics: - **Ground-Truth Motion** (`gt_motion.npz`): produced by `create_benchmark.py` from SEED BVH + metadata. - **Constraints Configuration** (`constraints.json`): for test cases with constraint inputs, this file is created by `create_benchmark.py` from SEED BVH + metadata. - **Generated Motion** (`motion.npz`): produced by the generation step from the model to evaluate (e.g. `generate_eval.py`). To perform the full evaluation, including metrics for both ground-truth and generated motions (steps 3--5), each leaf folder must contain both `gt_motion.npz` and `motion.npz`. > Note: all of the following steps will work with a _subset_ of the full test suite, if desired. Anywhere the `testsuite` directory is passed in, it can be replaced with a specific subset such as `testsuite/content/text2motion` to only run this subset of the benchmark. ## 1. Build Full Benchmark (`create_benchmark.py`) The `create_benchmark.py` script bridges the ground truth motions and metadata: it downloads the testsuite structure (if not already present locally), then reads the referenced BVH files from a local copy of BONES-SEED and writes `gt_motion.npz` and `constraints.json` into each sample folder. ```bash python benchmark/create_benchmark.py path/to/testsuite --dataset datasets/bones-seed/soma_uniform ``` By default, this construction can take several hours and the resulting folder is about **26 GB**. To run faster, you can increase the number of parallel workers for processing: ```bash OMP_NUM_THREADS=2 python benchmark/create_benchmark.py path/to/testsuite --dataset datasets/bones-seed/soma_uniform --workers 16 ``` This example runs well with a 32-core system, but you may need to adjust the number of threads-per-worker and total workers for your system. Generally, a lower number of threads-per-worker with larger number of workers (up to your available CPU capacity) runs fastest. Options: - `--dataset`: path to the local SEED dataset folder (default: `datasets/bones-seed/soma_uniform`). - `--workers`: number of parallel workers to use for benchmark construction (default: 1, sequential) - `--overwrite`: rebuild `gt_motion.npz` even if it already exists. For each test case, the script: 1. parses the BVH file into local rotation matrices and root translation, 2. subsamples to 30 FPS, 3. converts to the standard T-pose via `SOMASkeleton77.to_standard_tpose`, 4. computes Kimodo motion features and canonicalizes the motion, 5. writes the resulting motion dictionary as `gt_motion.npz`. For a detailed walkthrough of steps 1--4, see [Loading BONES-SEED BVH data](../user_guide/seed_dataset.md). ## 2. Generate Motions (`generate_eval.py`) The next step is to generate a motion for each test case. The script `benchmark/generate_eval.py` recursively generates one motion with Kimodo per test case from either the full `testsuite` or a desired subset. ```bash python benchmark/generate_eval.py \ --benchmark path/to/testsuite \ --output generated_folder \ --model kimodo-soma-rp \ --batch_size 32 \ --num_workers 4 ``` The batch size and number of data workers should be adjusted for your system. The script is intended to be run with the latest Kimodo-SOMA models (right now v1.1) which are compatible with the benchmark. > Note: each test cases has a seed in `meta.json` that is loaded and used for generation to enable reproducibility. However, by default, the generation script uses the first seed in a batch to seed the whole batch, so to make results completely repeatable, you must set the batch size to 1 or always use the same batch size when running generation. Useful options: - `--model`: Kimodo model to use for generation. See [available models](../getting_started/quick_start.md#overview-kimodo-models) for the full list. - `--output`: output root directory. The testsuite hierarchy is mirrored here. If omitted, motions are generated **in-place** inside the testsuite folder. - `--overwrite`: regenerate even if `motion.npz` already exists. - `--diffusion_steps`: default denoising steps (can be overridden by each sample `meta.json`). - `--postprocess`: enable post-processing. For fair evaluation, it is recommended to **not** use post-processing so that metrics reflect the raw model output. - `--text_encoder_fp32`: will instantiate the text encoder (if needed) with float32 precision instead of bfloat16. The Kimodo v1.1 models are trained with float32 text encodings, so this slightly improves accuracy but requires extra VRAM. After generation, the output tree mirrors the `testsuite` hierarchy and includes generated motions (`motion.npz`). If the testsuite was built with `create_benchmark.py`, each leaf already has `gt_motion.npz`; the generation step adds `motion.npz` per sample. ```text generated_folder/ └── .../0000/ ├── meta.json ├── constraints.json # present if available in testsuite ├── gt_motion.npz # if built with create_benchmark └── motion.npz # generated ``` ### Using Custom Models The `generate_eval` script is set up to work with Kimodo models, but it can be easily adapted or replaced by generation with a custom model. The only requirement to be able to compute all metrics is to output the `motion.npz` file for each test case that minimally contains: (1) `posed_joints` field with global joint positions on the SOMA 77-joint skeleton and (2) `foot_contacts` field with binary foot contact predictions. Please see the [output formats docs](../user_guide/output_formats.md) for more details on the `NPZ` format. ## 3. Embed with Pre-Trained TMR (`embed_folder.py`) Several evaluation metrics such as R-precision, FID, and latent similarity rely on latent embeddings of both motion and text. For this purpose, we use a [Text-Motion-Retrieval (TMR)](https://mathis.petrovich.fr/tmr/) model trained on the full Bones Rigplay dataset. See [Metrics](metrics.md) for details on the TMR evaluation protocol and metrics. The next step in the eval pipeline is using this TMR model with the `benchmark/embed_folder.py` script to recursively embed each generated motion (`motion.npz`), GT motion (`gt_motion.npz`) when present, and the text prompt from `meta.json`: ```bash python benchmark/embed_folder.py generated_folder --model tmr-soma-rp ``` The default TMR model (`tmr-soma-rp`) trained on the full Rigplay dataset is released as [`TMR-SOMA-RP-v1`](https://huggingface.co/nvidia/TMR-SOMA-RP-v1). It is automatically downloaded from HuggingFace on first use of the embedding script. Options: - `--model`: TMR model to use for encoding (default: `tmr-soma-rp`). - `--device`: compute device (`cuda` or `cpu`). Defaults to `cuda` if available, otherwise `cpu`. - `--overwrite`: re-embed even if embedding files already exist. - `--text_encoder_fp32`: will instantiate the text encoder (if needed) with float32 precision instead of bfloat16. The TMR model is trained with float32 text encodings, so this slightly improves accuracy but requires extra VRAM. Running this script saves the embeddings to each test case folder that has the corresponding motion file(s) and `meta.json`: - `motion_embedding.npy` (when `motion.npz` exists) - `gt_motion_embedding.npy` (when `gt_motion.npz` exists) - `text_embedding.npy` > Note: this script can take over 1 hour to run for the full test suite, depending on your GPU. ## 4. Compute Evaluation Metrics (`evaluate_folder.py`) Next, use `benchmark/evaluate_folder.py` to compute per-test-case and aggregated metrics across the test suite (or a specific subset folder). Each leaf folder must contain both `motion.npz` and `gt_motion.npz` to compute the metrics. ```bash python benchmark/evaluate_folder.py generated_folder ``` Options: - `--device`: compute device (`cuda` or `cpu`). Defaults to `cuda` if available, otherwise `cpu`. The script runs two evaluation passes: one on the generated motion (`motion.npz`) and one on the ground-truth motion (`gt_motion.npz`). It outputs: - per test case results: `metrics.json` inside each test case (leaf) folder with metrics summarized for that single test case - per group results: `.json` one level above each group of test-case folders that aggregates metrics over all contained test cases Please see the [Metrics](metrics.md) page for a detailed explanation of these json formats. After embedding and evaluation, the folder structure should look like: ```text generated_folder/ ├── .../0000/ │ ├── motion.npz │ ├── gt_motion.npz │ ├── motion_embedding.npy │ ├── gt_motion_embedding.npy │ ├── text_embedding.npy │ └── metrics.json # single test-case metrics └── .../.json # folder-level aggregate summary of all contained test cases ``` ## 5. Summarize Results of Full Benchmark (`parse_folder.py`) If you have computed metrics for the _entire_ test suite (both `content` and `repetition` splits), use `benchmark/parse_folder.py` to validate all per-test-case result JSONs and aggregate metrics into summary tables. Unlike the previous steps, this script expects the user to pass in the root `testsuite` and for the test suite to follow the standard split/category hierarchy (see [Introduction](introduction.md)): - **Splits**: `content`, `repetition` - **Categories**: `overview`, `timeline_single`, `timeline_multi` (text-following), `constraints_withtext`, `constraints_notext` (constrained generation) ```bash python benchmark/parse_folder.py generated_folder ``` Options: - `--output`: path for the output JSON (default: `/summary_rows.json`). - `--format`: table output format. `terminal` (default) for fixed-width tables, `md` for markdown tables suitable for copy-pasting into documentation. The script: 1. discovers all grouped test case directories (folders containing single test cases with `meta.json`, `motion.npz`, and `gt_motion.npz`), 2. loads each group's `.json` result files written by `evaluate_folder`, 3. computes weighted averages of all metrics by split and category, 4. writes `summary_rows.json` with per-row and per-table aggregated results, 5. prints formatted benchmark tables to the terminal (text-following and constraints, with GT and method rows side by side). Metric values in the tables are converted to user-friendly units (e.g., constraint position errors in cm, foot skating in cm/s). See [Metrics](metrics.md) for definitions of individual metrics. ================================================ FILE: docs/source/benchmark/results.md ================================================ # Kimodo Results On this page, we report the results for the latest Kimodo models on the benchmark test suite. These results are reproducible with the [evaluation pipeline](pipeline.md) and should be used when comparing against other models. Note that the reported numbers differ from the numbers in the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) (Sec. 6) due to differences in skeleton, test suite composition, and evaluation details. To reproduce these results or evaluate your own model, follow the [evaluation pipeline](pipeline.md) and use `parse_folder --format md` to generate summary tables in markdown format. **Note on reproducibility**: to exactly reproduce the results in the tables below, use batch size 1 when generating with Kimodo (i.e., when running `generate_eval.py`). This way, every test case is individually seeded according to `meta.json`. The reported results were computed using LLM2Vec in the default `bfloat16` precision. However, the Kimodo-SOMA-v1.1 and TMR models were actually trained with `float32` embeddings, so if you want to get the best possible performance (and you have enough VRAM), you can include `--text_encoder_fp32` when running the generation and embedding steps, even though the results will not match the tables here. Results are reported on the two splits described in [the introduction](introduction.md#dataset-splits): - **Content**: test cases with novel semantic content not present in training (e.g. unseen action categories). - **Repetition**: content categories seen during training, but specific motion clips are held out and unseen. Note that due to the annotations in Bones Rigplay and SEED datasets, the text prompts in this test split have already been seen during training. For each split, we also report metrics for the ground truth motion. These rows serve as an empirical upper bound for motion quality, and deviations between ground truth and generated metrics highlight where the model can improve. We split results for each model into two tables corresponding to different test cases in the test suite: - **Text-Following**: `overview`, `timeline_single`, and `timeline_multi` - **Constrained**: `constraints_withtext`, `constraints_notext` ## Quantitative Results Results are reported for two models: - **Kimodo-SOMA-SEED-v1.1**: trained on the public [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset. The results are comparable to any model trained on SEED that uses our recommended splits [described in the introduction](introduction.md#dataset-splits). - **Kimodo-SOMA-RP-v1.1**: trained on the full (proprietary) Bones Rigplay dataset which is a superset of BONES-SEED. Though the training split is larger, the model is not trained on the SEED test splits to ensure a fair comparison. ### Text-Following Evaluation | | Overview R@3↑ | Overview FID↓ | Overview Skate↓ | Overview Contact↑ | Timeline single R@3↑ | Timeline single FID↓ | Timeline single Skate↓ | Timeline single Contact↑ | Timeline multi R@3↑ | Timeline multi FID↓ | Timeline multi Skate↓ | Timeline multi Contact↑ | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | **Content** Ground Truth | 89.09 | 0.000 | 1.849 | 1.000 | 86.26 | 0.000 | 1.789 | 1.000 | 88.47 | 0.000 | 1.711 | 1.000 | | **Content** Kimodo-SOMA-SEED-v1.1 | 81.13 | 0.035 | 4.077 | 0.977 | 73.17 | 0.028 | 3.873 | 0.980 | 80.10 | 0.032 | 3.685 | 0.981 | | **Content** Kimodo-SOMA-RP-v1.1 | 83.32 | 0.025 | 3.641 | 0.982 | 78.08 | 0.026 | 3.523 | 0.984 | 84.79 | 0.028 | 3.278 | 0.985 | | **Repetition** Ground Truth | 93.91 | 0.000 | 2.106 | 1.000 | 90.13 | 0.000 | 2.037 | 1.000 | 94.49 | 0.000 | 1.931 | 1.000 | | **Repetition** Kimodo-SOMA-SEED-v1.1 | 90.92 | 0.004 | 4.573 | 0.972 | 80.38 | 0.007 | 4.442 | 0.976 | 92.58 | 0.006 | 4.199 | 0.974 | | **Repetition** Kimodo-SOMA-RP-v1.1 | 87.90 | 0.008 | 4.103 | 0.977 | 77.02 | 0.011 | 3.938 | 0.981 | 88.59 | 0.009 | 3.727 | 0.980 | ### Constrained Evaluation | | With text FB Pos↓ | With text EE Pos↓ | With text EE Rot↓ | With text 2D Root↓ | With text Pelvis@95% | Without text FB Pos↓ | Without text EE Pos↓ | Without text EE Rot↓ | Without text 2D Root↓ | Without text Pelvis@95% | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | **Content** Ground Truth | 0.000 | 0.000 | - | 3.837 | 5.36 | 0.000 | 0.000 | - | 3.913 | 5.41 | | **Content** Kimodo-SOMA-SEED-v1.1 | 3.421 | 3.817 | - | 4.979 | 9.14 | 3.320 | 3.664 | - | 4.797 | 9.03 | | **Content** Kimodo-SOMA-RP-v1.1 | 2.929 | 3.029 | - | 4.581 | 7.77 | 2.935 | 2.994 | - | 4.411 | 7.37 | | **Repetition** Ground Truth | 0.000 | 0.000 | - | 3.607 | 5.44 | 0.000 | 0.000 | - | 3.567 | 5.42 | | **Repetition** Kimodo-SOMA-SEED-v1.1 | 3.187 | 3.852 | - | 4.734 | 9.19 | 3.120 | 3.510 | - | 4.264 | 7.89 | | **Repetition** Kimodo-SOMA-RP-v1.1 | 2.804 | 2.983 | - | 4.260 | 7.63 | 2.829 | 2.969 | - | 4.027 | 7.21 | ================================================ FILE: docs/source/conf.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import os import sys # -- Path setup -------------------------------------------------------------- sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- project = "Kimodo" copyright = "2026, NVIDIA" author = "NVIDIA" version = "" release = "" # -- General configuration --------------------------------------------------- extensions = [ "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", "sphinx.ext.autosummary", "sphinx.ext.githubpages", "sphinx_copybutton", "myst_parser", "sphinx_design", ] napoleon_google_docstring = True napoleon_numpy_docstring = False napoleon_include_init_with_doc = True napoleon_use_param = True napoleon_use_rtype = True autodoc_default_options = { "members": True, "member-order": "bysource", "special-members": "__init__", "undoc-members": True, "exclude-members": "__weakref__", "show-inheritance": False, } autodoc_typehints = "none" autosummary_generate = True # Avoid initialization issues for optional native libs os.environ.setdefault("MUJOCO_GL", "osmesa") os.environ.setdefault("PYOPENGL_PLATFORM", "osmesa") class Mock: """Mock class for imports that can't be satisfied.""" def __init__(self, *args, **kwargs): pass def __call__(self, *args, **kwargs): return Mock() def __getattr__(self, name): if name in ("__file__", "__path__"): return "/dev/null" if name == "__version__": # Some libraries (e.g. safetensors) parse torch.__version__ with # packaging.version.Version, so this must be a valid PEP 440 string. return "0.0.0" if name == "__signature__": return None if name == "__mro_entries__": return lambda bases: () return Mock() def __getitem__(self, name): return Mock() def __iter__(self): return iter([]) def __or__(self, other): return Mock() def __ror__(self, other): return Mock() mock_modules = [ "torch", "torch.nn", "torch.nn.functional", "torch.optim", "torch.distributed", "torch.cuda", "torch.utils", "torch.utils.data", "lightning", "lightning.fabric", "lightning_fabric", "pytorch_lightning", "tensordict", "pydantic", "pydantic.dataclasses", "pydantic_core", "mujoco", "isaacgym", "isaacgymenvs", "genesis", "omni", "wandb", "hydra", "omegaconf", "tqdm", "trimesh", "pyvista", "smplx", "smpl", "scipy", "scipy.spatial", "scipy.spatial.transform", "peft", "transformers", "safetensors", "safetensors.torch", "sklearn", "PIL", "cv2", "rich", "rich.progress", "skimage", "imageio", "openmesh", "gym", "easydict", "dm_control", "dm_control.mjcf", "dm_control.mujoco", "matplotlib", "matplotlib.pyplot", ] for mod in mock_modules: sys.modules[mod] = Mock() autodoc_mock_imports = mock_modules templates_path = ["_templates"] exclude_patterns = ["api_reference/_generated/**"] language = "en" source_suffix = { ".rst": "restructuredtext", ".md": "markdown", } master_doc = "index" # -- Options for HTML output ------------------------------------------------- html_theme = "nvidia_sphinx_theme" html_static_path = ["_static"] html_css_files = ["custom.css"] html_logo = "_static/logo-placeholder.svg" html_show_sourcelink = False html_theme_options = { "collapse_navigation": False, "navigation_depth": 4, } toc_object_entries_show_parents = "hide" htmlhelp_basename = "Kimododoc" # -- Options for intersphinx ------------------------------------------------- intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "torch": ("https://pytorch.org/docs/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), } copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " copybutton_prompt_is_regexp = True # Generate heading anchors so cross-doc links like path.md#fragment resolve (local ids). myst_heading_anchors = 4 # Required so `:::{dropdown}` and other fenced directives in .md files are parsed (not shown as plain text). myst_enable_extensions = ["colon_fence"] def setup(app): app.add_css_file("custom.css") ================================================ FILE: docs/source/getting_started/installation.md ================================================ # Installation > Note: This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use. > Note: This repo was developed and primarily tested on Linux There are two ways to install Kimodo: (1) as a package, or (2) download the source code and install. Both require setting up a Hugging Face token to use the text encoder at generation time. ## Set Up Hugging Face Token The Kimodo text encoder relies on the **gated** `meta-llama/Meta-Llama-3-8B-Instruct` model, which requires: - Your HF account has been granted access to the [model page](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct). - You provide a HF token for runtime After receiving access to the Llama repo, please create an access token [here](https://huggingface.co/settings/tokens/new?tokenType=read). Then use it to log in on your command line: ```bash hf auth login ``` or alternatively, paste the token in this file ``~/.cache/huggingface/token``. If you don't have `hf` installed, you will first need to run `pip install --upgrade huggingface_hub`. ## Kimodo Install Option 1: Package Install The easiest way to get started is simply installing Kimodo as a package without needing to clone the codebase. This will allow you to generate motions and run the demo as a black box. We suggest creating a new Python environment for the install, for example with `venv` or conda: ```bash conda create -n kimodo python=3.10 conda activate kimodo ``` To ensure you have a version of [PyTorch](https://pytorch.org/get-started/locally/) that is compatible with your system and CUDA version, it is recommended to manually install the best version of PyTorch for you before installing Kimodo. Anything over PyTorch 2.0 is sufficient. We strongly suggest using a GPU-capable version of PyTorch to generate motions in a reasonable amount of time. Installing the base Kimodo package will allow you generate motions with the command line: ```bash pip install git+https://github.com/nv-tlabs/kimodo.git ``` If you want to be able to run the interactive demo as well, use this command which installs additional dependencies: ```bash pip install "kimodo[all] @ git+https://github.com/nv-tlabs/kimodo.git" ``` Now should be ready to use Kimodo. Check out the [quick start guide](quick_start.md) to see how to generate motions. If you experience issues with package or system compatibility using the above install strategy, we recommend downloading the codebase and using the Docker install detailed below. ## Kimodo Install Option 2: Source Code Install If you plan to build on Kimodo or dig into the codebase, you'll want to clone and install the repo. ### Clone Kimodo Repository ```bash git clone https://github.com/nv-tlabs/kimodo.git cd kimodo ``` ### Choose Your Installation Route Kimodo can be installed by building and running through a virtual environment (e.g., `conda`) or within a Docker container. ```{toctree} :maxdepth: 1 installation_virtual_env installation_docker installation_smpl ``` ================================================ FILE: docs/source/getting_started/installation_docker.md ================================================ # Installation With Docker > Note: the first time building and running with Docker can take several minutes, please be patient. ## Clone Modified Viser Library The interactive demo relies on [a fork of Viser](https://github.com/nv-tlabs/kimodo-viser) that implements a timeline interface and more. Clone it within the `kimodo` directory before building with Docker using: ```bash git clone https://github.com/nv-tlabs/kimodo-viser.git ``` ## Quick Install Before running Docker, make sure your Hugging Face token is available at `~/.cache/huggingface/token` on the host, for example by running `hf auth login` once outside the container (see the [Installation](installation.md) instructions). The easiest way to build and immediately run the interactive demo webapp (with the text-encoder service) in one command is: ```bash docker compose up -d --build ``` ## Step-by-Step Installation Alternatively, you can first build with: ```bash docker compose build ``` This builds `text-encoder` and `demo` containers corresponding to the text encoding service and the interactive motion authoring webapp, respectively. Please see the [quick start guide](quick_start.md) for more information on these.
Advanced Configuration of Dependencies This repo uses: - `docker_requirements.in`: human-maintained, top-level dependencies - `docker_requirements.txt`: pinned lockfile (automatically generated) Notes: - We keep a lockfile for **reproducible Docker builds** (so a rebuild next week pulls the same deps). - The lockfile intentionally **omits `torch`/CUDA wheels** because the Docker base image (`nvcr.io/nvidia/pytorch`) already provides a tested PyTorch build (avoids slow installs and CUDA mismatches).

After building, you will need to manually start the text-encoder service before doing any motion generation: ```bash docker compose up text-encoder ``` Note, the first time running this command will take a long time as the Llama-based text encoder is downloaded. Finally, to start the interactive demo: ```bash docker compose up demo ``` For more information on using the Docker setup, see the [Quick Start](quick_start.md) guide next. ================================================ FILE: docs/source/getting_started/installation_smpl.md ================================================ # Using Kimodo-SMPLX Model Using the [Kimodo-SMPLX-RP-v1](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) model requires a few extra installation steps. ## Request Model Access The SMPL-X version of Kimodo is gated, so before trying to generate motions with it in the CLI or demo, go to the [Hugging Face model page](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) and request access. As described in the [installation](./installation.md) process, make sure your HF token is properly set up so your access to the model can be authenticated. ## Download SMPL-X Body Model If you want to visualize generated SMPL-X motions in the demo, you will need to download the SMPL-X body model. Go to the [SMPL-X](https://smpl-x.is.tue.mpg.de/) webpage and then sign in or create an account and go to the "Download" page. Click "Download SMPL-X with removed head bun (NPZ)" and then copy the `SMPLX_NEUTRAL.npz` file to the Kimodo codebase to be at `kimodo/kimodo/assets/skeletons/smplx22/SMPLX_NEUTRAL.npz`. Note that if you installed Kimodo as a package without downloading the codebase, you'll need to find where the assets directory is located by running: ```bash python -c "from kimodo.assets import skeleton_asset_path; print(skeleton_asset_path('smplx22'))" ``` ================================================ FILE: docs/source/getting_started/installation_virtual_env.md ================================================ # Installation With Virtual Environment > Note: the repo was tested with Python 3.10+ and PyTorch 2.0+. ## Create Enviroment We recommend setting up a separate virtual environment for Kimodo to avoid dependency conflicts. ### Using venv ```bash python -m venv venv source venv/bin/activate ``` ### Using Conda ```bash conda create -n kimodo python=3.10 conda activate kimodo ``` ## Install Dependencies ### Install PyTorch First, make sure to install a version of [PyTorch](https://pytorch.org/get-started/locally/) that works with your system and CUDA version. We suggest anything over PyTorch 2.0. We strongly suggest using a GPU-capable version of PyTorch to generate motions in a reasonable amount of time. ### (Optional) Clone Modified Viser Library The interactive demo relies on [a fork of Viser](https://github.com/nv-tlabs/kimodo-viser) that implements a timeline interface and more. If you want to have an editable install of this version of Viser (i.e., you expect to modify it), clone and install it within the `kimodo` directory using: ```bash git clone https://github.com/nv-tlabs/kimodo-viser.git pip install -e kimodo-viser ``` ### Install Kimodo Next, install Kimodo run this command from the base of repo: ```bash pip install -e . ``` This results in a single editable install for Kimodo and the MotionCorrection package. If you plan to use the demo, you can instead run: ```bash pip install -e ".[all]" ``` This will install our [Viser fork](https://github.com/nv-tlabs/kimodo-viser) (if not already installed in the previous step) and the [SOMA body model](https://github.com/NVlabs/SOMA-X). Next, head over to the [Quick Start](quick_start.md) page to test out your installation by generating some motions. ================================================ FILE: docs/source/getting_started/quick_start.md ================================================ # Quick Start This page provides a quick introduction to motion generation with Kimodo. For detailed explanations, we recommend reviewing the full documentation pages linked in each section. Before running these commands, follow the [installation guide](installation.md) to install Kimodo in a virtual environment or using Docker. ## Overview: Kimodo Models Motion generation can be performed with several trained Kimodo models that vary by skeleton and training dataset. > Note: models will be downloaded automatically when attempting to generate from the CLI or Interactive Demo, so there is no need to download them manually | Model | Skeleton | Training Data | Release Date | Hugging Face | License | |-------|------|------|-------------|-------------|----| | **Kimodo-SOMA-RP-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | April 10, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SOMA-SEED-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | April 10, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SOMA-RP-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-G1-RP-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-G1-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SOMA-SEED-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-G1-SEED-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-G1-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) | | **Kimodo-SMPLX-RP-v1** | [SMPL-X](https://github.com/vchoutas/smplx) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) | [NVIDIA R&D Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-internal-scientific-research-and-development-model-license/) | By default, we recommend using the models trained on the full Bones Rigplay dataset (700 hours of mocap) for your motion generation needs. The models trained on BONES-SEED use 288 hours of [publicly available mocap data](https://huggingface.co/datasets/bones-studio/seed) so are less capable, but are useful for comparing your own trained models on the same dataset. See the [benchmark](../benchmark/introduction.md) for a standardized evaluation suite on BONES-SEED. ### Recommended Hardware Kimodo requires ~17GB of VRAM to generate locally entirely on GPU, due primarily to the size of the text embedding model. If you have a smaller card, set `TEXT_ENCODER_DEVICE=cpu` when running Kimodo commands to force text encoding to the CPU. This is slightly slower but reduces VRAM usage to <3 GB. The model has been most extensively tested on GeForce RTX 3090, GeForce RTX 4090, and NVIDIA A100 GPUs, but it should work on other recent cards with sufficient VRAM. ## Run Text-Encoder Service Motion generation relies on embedding the input text prompt, which becomes the input to Kimodo. Although it is fine to run the CLI commands and demo on their own, it may be preferred to start the _text encoder service_ in the background, which can be shared across all motion generation requests. This is much more efficient when making many consecutive CLI calls, as it avoids needing to instantiate the large text encoder every time. To start the text encoder service: ```bash kimodo_textencoder ``` The first run of the service will take a while as it downloads the embedding model. We recommend running this in the background or in a separate terminal where it will stay open and usable by other scripts. If you are using the Docker set up, the service can alternatively be started in the container with: ```bash docker compose up text-encoder ``` > Note: when the text encoder is initialized, the transformers library will report several unexpected and missing layers for LLM2Vec. These are expected and can be safely ignored. If you are running on a GPU with <16 GB VRAM, you can force the text encoder to the CPU, for example: ```bash TEXT_ENCODER_DEVICE=cpu kimodo_textencoder ``` ## Command-Line Text-to-Motion Generation **[CLI Documentation](../user_guide/cli.md)** You can generate motions from the command line using the generate script: ```bash kimodo_gen "A person walks forward." \ --model Kimodo-SOMA-RP-v1 \ --duration 5.0 \ --output output ``` The `--model` command corresponds to the model name in the table above. The output motion will be saved using the stem name given by `--output` in the Kimodo [output format](../user_guide/output_formats.md). For a detailed description of all generation arguments, including how to generate motion with constraints, see the full [CLI documentation](../user_guide/cli.md). If you set up Kimodo with Docker, you can instead run generation inside the Docker container, replacing `kimodo_gen XXX` with `docker compose run --rm demo kimodo_gen XXX`. If you will be running generation multiple times, it is better to start the `demo` container (e.g., in another terminal or in the background), and then run commands inside it with `docker compose exec demo kimodo_gen XXX`. ## Interactive Motion Authoring Demo **[Demo Documentation](../interactive_demo/index.md)** The demo allows easily generating motions with an intuitive control interface for text prompting and constraints. The demo can be started with: ```bash kimodo_demo ``` The demo is a webapp that will run on [http://localhost:7860](http://localhost:7860). Open this URL in your browser to access the interface. If you are using Docker, the demo can be launched with: ```bash docker compose up demo ``` or if you want to start the demo and text encoder service (explained below) at the same time, use: ```bash docker compose up ```
Additional Tips for Docker You may find the following commands useful if running Kimodo within the Docker containers. In the example commands below, you can also replace `demo` by `text-encoder`: **Check logs:** ```bash docker compose logs demo ``` **Stop service:** ```bash docker compose stop demo ``` **Restart service:** ```bash docker compose restart demo ``` **Stop and remove everything:** ```bash docker compose down ```
================================================ FILE: docs/source/index.md ================================================ # Kimodo Documentation
Kimodo
Scaling controllable human motion generation
## Overview Kimodo is a **ki**nematic **mo**tion **d**iffusi**o**n model trained on a large-scale (700 hours) commercially-friendly optical motion capture dataset. The model generates high-quality 3D human and robot motions, and is controlled through text prompts and an extensive set of constraints such as full-body pose keyframes, end-effector positions/rotations, 2D paths, and 2D waypoints. See the [project page](https://research.nvidia.com/labs/sil/projects/kimodo/) for details. ## Highlights

Controlled Generation

Text prompts combined with full-body, root, and end-effector constraints.

Human(oid) Support

Model variations for both digital humans and humanoid robots.

Interactive Demo

Timeline editing, real-time 3D visualization, and example presets.

## Quick links - [Installation](getting_started/installation.md) - [Quick Start](getting_started/quick_start.md) - [Command Line Interface](user_guide/cli.md) - [Interactive Demo](interactive_demo/index.md) - [Project Structure](project_structure.md) ```{toctree} :maxdepth: 3 :caption: Getting Started :hidden: getting_started/installation getting_started/quick_start ``` ```{toctree} :maxdepth: 2 :caption: User Guide :hidden: interactive_demo/index user_guide/cli user_guide/constraints user_guide/output_formats user_guide/motion_convert user_guide/seed_dataset user_guide/configuration ``` ```{toctree} :maxdepth: 2 :caption: Key Concepts :hidden: key_concepts/model key_concepts/limitations key_concepts/motion_representation key_concepts/constraints key_concepts/skeleton ``` ```{toctree} :maxdepth: 2 :caption: Benchmark :hidden: benchmark/introduction benchmark/pipeline benchmark/metrics benchmark/results ``` ```{toctree} :maxdepth: 2 :caption: Reference :hidden: project_structure project_info api_reference/index ``` ================================================ FILE: docs/source/interactive_demo/constraints.md ================================================ # Constraints Constraints guide the motion at specific frames or intervals. To learn about the types of constraints details of each, see the [constraints concepts](../key_concepts/constraints.md) and [constraints format](../user_guide/constraints.md) pages. ![Constraints panel](../_static/demo/constraints_panel.png) ![Editing mode](../_static/demo/editing_mode.png) The constraint panel allows you to configure constraints and editing: - **Enter Editing Mode**: enable FK pose editing in the viewer. Gizmos will be displayed on joints that can be edited. If there is already a constraint on the timeline for the current frame, any pose editing will adjust that constraint, otherwise you need to add a constraint on the timeline after adjusting the pose. - **Gizmo space**: whether to display the rotation gizmos in local or global joint space while editing - **Snap to Constraint**: will snap the current frame of motion to the constraint at that frame. This can be useful if a generated pose does not exactly meet the constraint and you want to continue editing the constraint. - **Reset Constraint**: does the opposite by snapping the pose back to the original generated motion from the constrained pose. - **Root 2D Options > Make Smooth Path**: if you have laid down root waypoint constraints, checking this box will turn the waypoints into a smoothed dense path constraint. If there is not a waypoint at the first and last frames of the motion, they will be automatically added since Kimodo is only trained on full-sequence paths. - **Clear All Constraints**: clears all current constraints from the viewer and timeline. ================================================ FILE: docs/source/interactive_demo/examples.md ================================================ # Examples The Examples Tab within the settings panel contains several examples that highlight the key capabilities and potential workflows with Kimodo. Examples are included for the `Kimodo-SOMA-RP` and `Kimodo-G1-RP` models. ![Skeleton overview](../_static/demo/examples_panel.png) After choosing an example from the dropdown menu, click "Load Example" to load the example configuration into the viewer. The viewer will display the pre-generated motion along with the prompts and constraints on the timeline that were used to generate it. All settings used to generate the model are also loaded with the example (e.g., seed, classifier-free guidance settings), so you should be able to click "Generate" in the panel to recover the same result. Example cover a variety of ways to use one or more text prompts along with kinematic constraints for generation. **Saving New Examples**: after you've generated a motion, you can save a new Example under the "Load/Save" tab of the Settings panel. You should immediately see the Examples dropdown update with your new saved example so it can be loaded in later. This section walks through common workflows and how to use the webapp. Each workflow has its own section and an accompanying video. ================================================ FILE: docs/source/interactive_demo/export_results.md ================================================ # Saving/Loading The Load/Save and Exports panels allow saving generated results and load in previously generated results ![Export panel](../_static/demo/exports_panel.png) - **Load/Save** - **Motion**: save the current motion in the [NPZ format](../user_guide/output_formats.md#kimodo-npz-format) to a specific path. Motion NPZs can also be loaded into the viewer from this panel. This is useful to load in motions generated with the CLI. - **Constraints**: save the current constraints in the [JSON format](../user_guide/constraints.md) to a specific path. Constraint JSON files can also be load into the viewer. - **Example**: allows saving a new example that encompasses the current motion, constraints, and all settings. This is useful for reloading previous work. If examples are saved to the demo examples directory, they will be loadable from the Examples dropdown menu, otherwise you can load them through file path in this menu. - **Exports** - **Screenshot**: save current canvas as an image that can be downloaded through your browser - **Video**: record the current motion to a video that can be download through your browser - **Motion**: save the current motion to a format of your choice depending on the loaded skeleton: - SOMA: `NPZ` or `BVH` - G1: `NPZ` or `CSV` - SMPL-X: `NPZ` or `AMASS NPZ` These formats are described in [output formats](../user_guide/output_formats.md). ================================================ FILE: docs/source/interactive_demo/generation.md ================================================ # Generation The most important panel is the "Generate" which allows you to call Kimodo to generate one or more motions based on the prompts, constraints, and settings provided. ![Generate panel](../_static/demo/generate_panel.png) - **Num Samples**: the number of motions to generate based on the current settings. When multiple samples are generated, you _must_ choose a single sample by clicking the character in the viewer before editing constraints or generating new motion. - **SOMA Layer**: if using a `Kimodo-SOMA` model, this option will appear. It allows you to use the SOMA body layer to skin the character instead of using the SOMA rig. For details on the difference between the two, see the [Skeletons page](../key_concepts/skeleton.md#soma-default). - **Seed**: random seed for repeatable generation - **Denoising steps**: number of steps to use with DDIM - **CFG Text/Constraint Weight**: the weights to use for classifier-free guidance - **Post-Processing**: whether to use foot skate cleanup and constraint post-optimization to improve motion after generation - **Root Margin**: if the skeleton root deviates more than this margin from a constraint, the post-processing will fix it ================================================ FILE: docs/source/interactive_demo/index.md ================================================ # Interactive Demo The web-based interactive demo provides an intuitive interface for generating motions with any of the Kimodo model variations. ![Demo Interface](../_static/overview.png) *Interactive demo interface build with [Viser](https://github.com/viser-project/viser)* ```{note} To see the demo in action, follow the [setup instructions](launching.md) below and launch it locally. After launching, open the demo in a web browser at http://127.0.0.1:7860 or use port forwarding if running on a server. ``` The demo provides a timeline-based interface for composing text prompts and constraints, with real-time 3D visualization. Here are some key features: - **Multiple Characters**: Supports generating with the SOMA, G1, and SMPL-X versions of Kimodo - **Text Prompts**: Enter one or more natural language descriptions of desired motions on the timeline - **Timeline Editor**: Add and edit keyframes and constrained intervals on multiple constraint tracks - **Constraint Types**: - Full-Body: Complete joint position constraints at specific frames - 2D Root: Define waypoints or full paths to follow on the ground plane - End-Effectors: Control hands and feet positions/rotations - **Constraint Editing**: Editing mode allows for re-posing of constraints or adjusting waypoints - **3D Visualization**: Real-time rendering of generated motions with skeleton and skinned mesh options - **Playback Controls**: Preview generated motions with adjustable playback speed - **Multiple Samples**: Generate and compare multiple motion variations - **Examples**: Load pre-existing examples to better understand Kimodo's capabilities - **Export**: Save constraints and generated motions for later use ## Quick Links - [Starting the Demo](launching.md) - [UI Overview](ui_overview.md) - [Examples](examples.md) ```{toctree} :maxdepth: 2 :hidden: launching ui_overview model_selection examples generation constraints export_results ``` ================================================ FILE: docs/source/interactive_demo/launching.md ================================================ # Running the Demo After following the installation [instructions](../getting_started/installation.md), the demo can be launched with the commands below. The demo runs in the web browser at [http://localhost:7860](http://localhost:7860).
If you run the demo on a server, you can use port forwarding to access it. To access the demo's web interface when running on a remote server, set up SSH port forwarding so your web browser can reach `http://localhost:7860` as if it was local. **Option 1: Add LocalForward to your SSH config** Edit (or create) your SSH config file (typically `~/.ssh/config`): ``` Host your-server-name HostName your.server.address User username LocalForward 7860 localhost:7860 ``` Then connect with: ``` ssh your-server-name ``` **Option 2: Use the SSH command-line directly** From your local machine, run: ``` ssh -N -L 7860:localhost:7860 username@your.server.address ``` This will forward your local port 7860 to the server's port 7860. After connecting, open [`http://localhost:7860`](http://localhost:7860) in your web browser. Replace `username` and `your.server.address` with your actual user and server info.

If you will be restarting the demo frequently, we recommend first starting the text encoder service in the background, as detailed in the [quick start guide](../getting_started/quick_start.md#run-text-encoder-service). If the text encoder service is not running, the demo will automatically load the text encoder model. The demo will also automatically download the Kimodo model checkpoint on launch and whenever needed when the model preference is changed in the UI. ## Launch from Command Line If you installed Kimodo as a package or from source, the demo can be started with: ```bash kimodo_demo ``` ## Launch with Docker If you installed with Docker, you can start the demo with: ```bash docker compose up demo ```
Additional Tips for Docker You may find the following commands useful if running Kimodo within the Docker containers. In the example commands below, you can also replace `demo` by `text-encoder`: **Check logs:** ```bash docker compose logs demo ``` **Stop service:** ```bash docker compose stop demo ``` **Restart service:** ```bash docker compose restart demo ``` **Stop and remove everything:** ```bash docker compose down ``` ================================================ FILE: docs/source/interactive_demo/model_selection.md ================================================ # Model Selection Model selection allows choosing between the Kimodo models detailed in the [quick start guide](../getting_started/quick_start.md#overview-kimodo-models). The models determine which character is loaded in the scene and the possible export options. - **SOMA**: default human skeleton - **G1**: MuJoCo-compatible exports - **SMPL-X**: SMPL-X compatible outputs For details on each skeleton, see [Skeletons](../key_concepts/skeleton.md). Model selection UI ![Skeleton overview](../_static/skeletons/skeletons.png) ================================================ FILE: docs/source/interactive_demo/ui_overview.md ================================================ # UI Overview This page gives an overview of each of the main elements of the demo UI and how to use them. ![Demo Interface](../_static/overview.png) *An example scene within the demo webapp* ## Viewer ![Viewer](../_static/demo/viewer.png) The 3D viewer shows the currently generated motion. It supports skeleton or skinned mesh rendering, which is configurable in the "Visualize" panel. ### Camera - **Left-drag**: rotate - **Right-drag**: pan - **Scroll**: zoom ### Playback - **Space** to play/pause - **←/→** to step frames, or click the frame number. ## Timeline ![Timeline](../_static/demo/timeline.png) The timeline is where you: - add, edit, and delete **prompt segments** - add and delete **constraints** at frames or intervals and adjust timing ### Timeline Navigation - **Scroll Up/Down** in the timeline: move left/right - **Shift + Scroll** in the timeline: zoom in/out ### Prompts - **Double-Click** a text prompt to edit the text - **Click and Drag** the right edge of a prompt box to extend/shorten it (2-10 sec) - **Click Empty Space** to add a prompt - **Right-Click** a prompt to delete it ### Constraints Constraints can be added after generating for the first time when there is an active motion in the viewer: - **Click** in the timeline tracks (Full-Body / 2D root etc) to add a constraint of that type using the pose at that frame - **Ctrl/Cmd + Click + Drag** to add an interval constraint, or expand a keyframe into an interval - **Click + Drag** an existing constraint to move it to a different frame - **Right-Click** on a constraint to delete it - To **edit** a constraint: - Move playback to the target frame - Click **Enter Editing Mode** in the Constraints tab of the Settings Panel. Note you must exit editing mode before generating again. ## Settings Panel ![Panel](../_static/demo/panel.png) The settings panel includes: - model selection - loading examples - model parameter selection for generation and post-processing - parameters for constraint editing - motion loading and saving - visualization options Important settings panels are individually explained on subsequent pages. ================================================ FILE: docs/source/key_concepts/constraints.md ================================================ # Constraints Constraints are time-localized signals that steer the generated motion toward specific spatial goals while keeping the rest of the motion free for the model to resolve. You can combine constraints with text prompts to control trajectory, pose, and end-effectors. Constraints are most easily defined in the [interactive demo](../interactive_demo/constraints.md) and can be saved to the [JSON format](../user_guide/constraints.md). ![Overview diagram of constraint types on a timeline](../_static/constraints.png) ## Why Constraints? Constraints allow you to: - pin the character to a target pose or keyframe - guide a path on the ground while preserving natural motion - fix hands or feet at specific times (for example, touch or contact events) ## Constraint Types Kimodo is trained to excel at specific types of constraints. **Sparse root 2D waypoint**: ground-plane 2D waypoints that guide the global translation of the character. This constrains the 2D components of the smoothed root representation generated by Kimodo. **Dense root 2D path**: dense 2D path constraints that guide a continuous trajectory. This constrains the 2D components of the smoothed root representation generated by Kimodo. **Sparse full-body keyframe**: full-body pose targets at specific frames. Within the Kimodo motion representation, this constrains the smoothed root position and all body joint positions at a specific frame. **Sparse end-effector constraint**: hands or feet targets while leaving the rest of the body flexible. This constrains the smoothed root position along with the specified end-effectors. For hands, this will constrain the wrist position and rotation along with the hand end position. For feet, it constraints the heel position and rotation along with the toe position. Kimodo is trained to support arbitrary subsets of end-effectors. **Foot contacts**: toe/heel contact patterns. While the model is trained to support this, it is not currently implemented in the demo UI or Python API. ```{note} For SOMA models, constraints may be authored or displayed on the full `somaskel77` skeleton, but Kimodo converts them to the reduced `somaskel30` representation before passing them to the model. See the [skeleton](./skeleton.md) section for more details. ``` ## Coordinate Space All constraint values are in a **Y-up** coordinate system with units in **meters**. The model expects constraints relative to a canonical origin where the root starts at XZ = (0, 0) at frame 0. The initial heading can be set via the `first_heading_angle` generation parameter (defaults to 0, facing +Z). See the [constraints JSON format](../user_guide/constraints.md#coordinate-space-and-units) for full details on each field. ## Time and Scope In our CLI and demo, constraints can be defined at: - **Single frames**: keyframe-style constraints - **Intervals**: guidance across a range of frames However, as described above, the model is trained to excel mostly at sparse keyframes, with dense keyframes usually only seen for root paths. See [best practices](./limitations.md) for more details. ## Post-Processing Since it is very challenging for a neural network to strictly adhere to constraints, the demo and CLI support motion post-processing to ensure motion _exactly_ hits constraints. This is done through a lightweight optimization that smoothly adjusts joints while minimizing changes in acceleration and velocity. ================================================ FILE: docs/source/key_concepts/limitations.md ================================================ # Best Practices On this page, we summarize the best approaches to maximize Kimodo's capabilities in terms of prompting and constraints, and also summarize known limitations and failure cases. For additional context, please see the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf). ## Text Prompting - For best results, begin each prompt with "A person..." (e.g., "A person walks forward" or "A person jumps and waves"). This phrasing helps clarify the subject and intent of the motion, and is more closely aligned with the style of prompts used in the training data. The subject can also be stylized to better describe the motion such as "An old person..." or "A drunk person..." - Keep each prompt focused one or at most two behaviors. For long sequences of action, split them into multiple prompts and generate in sequence. - It's best to use a medium level of detail when describing a motion. Prompts like "A person walks." are too short and vague, while very long prompts describing detailed motion of each body part will be too much for the model to handle. Most training data is a middleground between these two. We recommend looking at the prompts in the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed) to get an idea of prompt granularity. - Kimodo is trained on a specific set of human behaviors. The training data tends to cover locomotion, gestures, everyday activities, common object interactions, videogame combat, dancing, and various styles including tired, angry, happy, sad, scared, drunk, injured, stealthy, old, and childlike. Prompts for actions outside of these categories will likely give bad results. For example "A baseball player walks up to the plate and swings a bat" is not good, becuase Kimodo has not trained on baseball data. - When using multiple prompts (e.g., in the timeline UI), make sure each prompt has enough information on its own. For example, if prompt 1 is "A person is walking while carrying an object", then prompt 2 could be "A person walking carrying an object comes to a stop". If prompt 2 were instead "Then the person stops", the model will not have enough context for what happened previously and may generat poor quality motions. ## Constraints - Avoid using constraints that contradict the given text prompt or other types of constraints. If you are having trouble with a tradeoff between constraint and text accuracy, try adjusting the [classifier-free guidance weights](../user_guide/configuration.md). - Except for dense 2d root paths, Kimodo is mainly trained to handle sparse temporal constraints. Kimodo will perform best when the number of constraints per constraint type is less than 20 keyframes. - When foot contact accuracy and hitting constraints is high priority, make sure to enable [post-processing](./constraints.md#post-processing). ## Limitations - **Motion length:** Maximum generated motion duration is 10 sec per prompt - **Number of constraints:** The number of constrained frames per constraint type should be less than 20 (excluding the root path constraint) - **Overly long or complex prompts** can blur motion intent, especially when many distinct actions are packed into a single prompt. - **Conflicting constraints:** can lead to artifacts or constraints that are ignored - **Multi-prompt sequences**: When generating motions with a sequence of prompts, each motion is generated one at a time. The second motion is conditioned on the last frames of the first, so the transition between prompts actually happens at the start of the second motion. This means the second prompt must devote some of its duration to performing a smooth transition, which may reduce the time available to realize the new prompt content fully. - **Post-processing**: The model by itself can generate foot skating and will not exactly hit constraints. Post-processing helps with this, but currently does not work well for the G1 robot skeleton. ================================================ FILE: docs/source/key_concepts/model.md ================================================ # Model Overview At a glance: - Input: text prompt + optional constraints. - Output: full-body motion sequence - Core Idea: denoise motion features with a two-stage transformer at each step. Kimodo is an explicit motion diffusion model that generates 3D human motion by denoising a sequence of skeleton poses. The model operates on a carefully designed motion representation that enables precise control over generated motion while minimizing common artifacts, such as floating and foot skating. The motion representation features a smoothed root that emulates paths drawn in practical animation tools, along with global joint rotations and positions amenable to sparse keyframe constraints. For full details, see the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) ![Kimodo model architecture](../_static/arch.png) ## Diffusion Process At each step of the denoising process, the model takes in an embedding of the text prompt, a set of kinematic constraints, and the current noisy motion. Constraints are specified using the same motion representation as the input motion, and are used to overwrite the corresponding values in the noisy motion. Additionally, a mask indicating which elements are constrained is concatentated to the input motion. The goal is to predict a clean version of the input motion. ## Two-Stage Transformer Denoiser Given these inputs, the two-stage transformer denoiser predicts a clean motion that aligns with the text and constraints. The two-stage denoiser decomposes root and body motion prediction: the root denoiser first predicts global root motion, which is transformed into a local representation as input to the body denoiser. The final output is the concatenation of the two stages. ## Training Dataset A key component to effectively train Kimodo is the [Bones Rigplay](https://bones.studio/ai-datasets/) dataset, a large studio mocap dataset containing over 700 hours of production-quality human motion with corresponding text descriptions. The data covers locomotion, gestures, everyday activities, common object interactions, videogame combat, dancing, and various styles including tired, angry, happy, sad, scared, drunk, injured, stealthy, old, and childlike. ================================================ FILE: docs/source/key_concepts/motion_representation.md ================================================ # Motion Representation Kimodo uses a motion representation that combines a smoothed root representation with global joint positions, rotations, and various auxiliary features. For full details, please refer to the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf). The representation is implemented in `kimodo/motion_rep/reps/kimodo_motionrep.py` and allows easily going to and from this feature representation. ## Coordinate System All motion features use a right-handed coordinate system with: - **Y up** - **+Z forward** ## Smoothed Root Representation We use a smoothed root trajectory for the global root position to make path-following constraints more natural and controllable. Smoothing removes high-frequency pelvis jitter while preserving overall motion direction, so 2D waypoints or paths drawn by users remain clean and easy to match during generation, while the pelvis can still move naturally around the smoothed curve. ![Comparison of smoothed root rep](../_static/smoothed_root.png) ## Pose Feature At each frame, the pose feature vector is the concatenation of: - **Smooth root position** (`smooth_root_pos`, 3): Smoothed pelvis/root position. The x/z components track ground-plane motion and y stores height. - **Global root heading** (`global_root_heading`, 2): `[cos(theta), sin(theta)]` heading direction of the root. - **Local joint positions** (`local_joints_positions`, `J x 3`): Joint positions in a pelvis-relative space with the smoothed root x/z offset applied. - **Global joint rotations** (`global_rot_data`, `J x 6`): 6D rotation representation of each joint's global orientation. - **Joint velocities** (`velocities`, `J x 3`): Global joint velocities. - **Foot contacts** (`foot_contacts`, 4): Binary contact indicators for the left/right foot contact points. ================================================ FILE: docs/source/key_concepts/skeleton.md ================================================ # Skeletons Different versions of Kimodo support different skeletons (character). A separate model is trained for each skeleton, with the currently available options being [SOMA](https://github.com/NVlabs/SOMA-X), [G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1), and [SMPL-X](https://github.com/vchoutas/smplx). The skeletons discussed on this page are defined in `kimodo/skeleton/definitions.py`. ![Skeleton overview](../_static/skeletons/skeletons.png) ## SOMA (default) SOMA is the default skeleton used for Kimodo. It it based on the [SOMA body model](https://github.com/NVlabs/SOMA-X), which is also used in the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed). Kimodo uses two closely related SOMA skeleton definitions: - **`somaskel30`**: the reduced 30-joint skeleton used internally by the model and by the core SOMA constraint formulation. It removes most finger and hand detail. - **`somaskel77`**: the full 77-joint SOMA skeleton used for public-facing visualization and SOMA motion exports. In practice, Kimodo predicts SOMA motions on `somaskel30` and converts them to `somaskel77` when returning or visualizing results in the demo. Older assets and examples may still be stored on `somaskel30`, and the tooling keeps backward compatibility with those files. Note that all training data for Kimodo is on a uniform skeleton proportion corresponding to one single set of identity parameters for the SOMA body model. !["SOMA skeletons"](../_static/skeletons/soma_skels.png) Outputs on the SOMA skeleton can be visualized in two ways. The first is by articulating a fixed SOMA rig and doing traditional skinning (corresponds to `kimodo/viz/soma_skin.py` in the codebase). Alternatively, we can take generated joint rotations and feed them through the SOMA layer with the set of identity parameters that correspond to the body shape of our uniform skeleton. An example of this in the codebase at `kimodo/viz/soma_layer_skin.py`, which uses the identity parameters defined from `kimodo/assets/skeletons/somaskel30/soma_base_fit_mhr_params.npz` (the same ones from BONES-SEED data). Due to peculiarities with data processing, using the SOMA rig and SOMA layer give very slightly different results in visualization, with the SOMA rig better reflecting the data that Kimodo was trained on. ## Unitree G1 The G1 skeleton targets MuJoCo-compatible exports and robotics workflows. The version that Kimodo uses is a 34-joint skeleton, with extra joints added for the toes to ease learning. When generated motions are exported to the MuJoCo `qpos` CSV format, these joints are removed to be compatible with downstream applications. G1 skeleton ## SMPL-X This aligns with the SMPL-X model and supports AMASS-style exports. It uses 22 joints corresponding to only the body joints. This option is useful for compatibility with SMPL-X pipelines or downstream tools expecting AMASS parameters, but it is **not** the recommended Kimodo model to use since generated motions may display particularly severe retargeting artifacts. SMPL-X skeleton ================================================ FILE: docs/source/project_info.md ================================================ # Project Information ## Citation If you use this code in your research, please cite: ```bibtex @article{Kimodo2026, title={Kimodo: Scaling Controllable Human Motion Generation}, author={Rempe, Davis and Petrovich, Mathis and Yuan, Ye and Zhang, Haotian and Peng, Xue Bin and Jiang, Yifeng and Wang, Tingwu and Iqbal, Umar and Minor, David and de Ruyter, Michael and Li, Jiefeng and Tessler, Chen and Lim, Edy and Jeong, Eugene and Wu, Sam and Hassani, Ehsan and Huang, Michael and Yu, Jin-Bey and Chung, Chaeyeon and Song, Lina and Dionne, Olivier and Kautz, Jan and Yuen, Simon and Fidler, Sanja}, journal={arXiv:2603.15546}, year={2026} } ``` ## License The codebase is licensed under Apache-2.0. Please see the codebase for full license text. Note that model checkpoints are licensed separately as indicated on the HuggingFace download pages. ## Acknowledgments This project builds upon several excellent open-source projects: - [Viser](https://github.com/nerfstudio-project/viser) for 3D visualization - [LLM2Vec](https://github.com/McGill-NLP/llm2vec) for text encoding ## Contact For questions or issues, plese open an issue on this repository or reach out directly to the authors. ================================================ FILE: docs/source/project_structure.md ================================================ # Project Structure ```text kimodo/ ├── kimodo/ # Main Python package │ ├── model/ # Model architecture and loading │ │ ├── kimodo_model.py # Kimodo diffusion model wrapper │ │ ├── twostage_denoiser.py # Two-stage denoising architecture │ │ ├── backbone.py # Transformer encoder backbone │ │ ├── diffusion.py # Diffusion process │ │ ├── cfg.py # Classifier-free guidance │ │ ├── common.py # Shared model utilities │ │ ├── load_model.py # Model loading and registry lookup │ │ ├── loading.py # Checkpoint loading utilities │ │ ├── registry.py # Model registry (skeleton, checkpoint URLs) │ │ ├── text_encoder_api.py # Text encoder API client │ │ ├── tmr.py # TMR compatibility │ │ └── llm2vec/ # LLM-based text encoder │ ├── motion_rep/ # Motion representation │ │ ├── reps/ # Skeleton-specific motion reps │ │ │ ├── base.py # Base motion rep types │ │ │ ├── kimodo_motionrep.py │ │ │ └── tmr_motionrep.py │ │ ├── conditioning.py # Conditioning (text, constraints) │ │ ├── feature_utils.py # Feature extraction │ │ ├── feet.py # Foot contact / smoothing │ │ ├── smooth_root.py # Smooth root representation │ │ └── stats.py # Normalization statistics │ ├── skeleton/ # Skeleton definitions and kinematics │ │ ├── definitions.py # Skeleton topology (joints, chains) │ │ ├── registry.py # Skeleton registry │ │ ├── base.py # Base skeleton types │ │ ├── kinematics.py # Forward kinematics │ │ ├── transforms.py # Rotation/transform utilities │ │ └── bvh.py # BVH I/O │ ├── viz/ # Visualization │ │ ├── scene.py # 3D scene setup │ │ ├── playback.py # Timeline / motion playback │ │ ├── viser_utils.py # Viser 3D helpers │ │ ├── gui.py # Demo GUI components │ │ ├── constraint_ui.py # Constraint editing UI │ │ ├── coords.py # Coordinate frames │ │ ├── soma_skin.py # SOMA character skinning │ │ ├── soma_layer_skin.py # SOMA layer-based skinning │ │ ├── smplx_skin.py # SMPL-X skinning │ │ └── g1_rig.py # G1 robot rig │ ├── demo/ # Interactive web demo │ │ ├── app.py # Demo entry (Gradio / Viser) │ │ ├── config.py # Demo configuration │ │ ├── state.py # Application state │ │ ├── ui.py # UI layout and callbacks │ │ ├── generation.py # Generation pipeline for demo │ │ ├── embedding_cache.py # Cached text embeddings │ │ ├── queue_manager.py # Request queue for demo │ │ └── __main__.py # Demo run as module │ ├── exports/ # Motion I/O and format conversion │ │ ├── motion_io.py # Kimodo motion dict helpers (load, save, resample) │ │ ├── motion_convert_lib.py # Library API for format conversion │ │ ├── motion_formats.py # Format detection and FPS resolution │ │ ├── bvh.py # SOMA BVH read/write │ │ ├── mujoco.py # G1 MuJoCo qpos conversion │ │ └── smplx.py # AMASS / SMPL-X conversion │ ├── metrics/ # Evaluation metrics │ │ ├── base.py # Metric base classes │ │ ├── foot_skate.py # Foot skate metrics │ │ ├── constraints.py # Constraint metrics │ │ └── tmr.py # TMR-based metrics │ ├── scripts/ # CLI and helper scripts │ │ ├── generate.py # CLI for motion synthesis (kimodo_gen) │ │ ├── motion_convert.py # CLI for format conversion (kimodo_convert) │ │ ├── run_text_encoder_server.py # Text encoder server (kimodo_textencoder) │ │ ├── gradio_theme.py # Gradio theme for demo │ │ ├── lock_requirements.py # Dependency locking │ │ └── mujoco_load.py # MuJoCo g1 csv loading │ ├── assets/ # Package data (shipped with package) │ │ ├── demo/ # Demo examples and config │ │ └── skeletons/ # Skeleton assets │ ├── constraints.py # Constraint definitions and handling │ ├── geometry.py # Geometric utilities │ ├── postprocess.py # Post-processing (e.g. MotionCorrection) │ ├── meta.py # Motion metadata │ ├── sanitize.py # Input sanitization │ ├── assets.py # Asset path resolution │ └── tools.py # General utilities ├── benchmark/ # Evaluation pipeline scripts │ ├── create_benchmark.py # Step 1: Build test suite from SEED + metadata │ ├── generate_eval.py # Step 2: Generate motions for test suite │ ├── embed_folder.py # Step 3: Embed motions and text with TMR │ ├── evaluate_folder.py # Step 4: Compute metrics for test cases │ └── parse_folder.py # Step 5: Aggregate and display results ├── MotionCorrection/ # Optional C++/Python post-processing │ ├── python/motion_correction/ # Python bindings │ └── src/cpp/ # C++ implementation ├── docs/ # Documentation (Sphinx) │ └── source/ # RST/MD sources ├── assets/ # Repo-level assets (banner, screenshots) ├── pyproject.toml # Package config and entry points ├── setup.py # Setuptools entry (if needed) ├── Dockerfile # Container image for demo ├── docker-compose.yaml # Docker Compose for demo + text encoder └── README.md ``` Entry points (from `pyproject.toml`): - **`kimodo_gen`** — command-line motion synthesis (`kimodo.scripts.generate:main`) - **`kimodo_demo`** — interactive web demo (`kimodo.demo:main`) - **`kimodo_convert`** — motion format conversion (`kimodo.scripts.motion_convert:main`) - **`kimodo_textencoder`** — text encoder server (`kimodo.scripts.run_text_encoder_server:main`) ================================================ FILE: docs/source/user_guide/cli.md ================================================ # Command-Line Interface The primary CLI entrypoint is the `kimodo_gen` command. This corresponds to the script located in `kimodo/scripts/generate.py`, therefore you can equivalently use `python -m kimodo.scripts.generate`. **Docker Usage**: If you set up Kimodo with Docker, you can instead run generation inside the Docker container, replacing `kimodo_gen XXX` with `docker compose run --rm demo kimodo_gen XXX`. If you will be running generation multiple times, it is better to start the `demo` container (e.g., in another terminal or in the background), and then run commands inside it with `docker compose exec demo kimodo_gen XXX`. **Single Prompt Generation:** ```bash kimodo_gen "A person walks forward." \ --model Kimodo-SOMA-RP-v1 \ --duration 5.0 \ --output output ``` The `--model` command corresponds to the Kimodo model checkpoint to generate with. By default, the `Kimodo-SOMA-RP-v1` is used if not provided. The output motion will be saved using the stem name given by `--output` in the Kimodo [output format](../user_guide/output_formats.md). If generating with a G1 or SMPL-X model, you can also save to other output formats like MuJoCo qpos CSV file and AMASS NPZ format. For **offline conversion** between Kimodo NPZ, AMASS NPZ, SOMA BVH, and G1 MuJoCo CSV after generating, use `kimodo_convert` (see [Motion format conversion](motion_convert.md)). **Multi-Prompt Generation:** Generating from a sequence of text prompts can be achieved by using multiple sentences separated by periods with corresponding durations: ```bash kimodo_gen "A person walks forward. A person is walking backwards." \ --duration "5.0 4.0" \ ``` This command will use Kimodo to generate each prompt in sequence, with constraints added to the beginning of the second prompt to ensure continuity with the first generated motion. You can control how many frames are used to blend consecutive motions with the `--num_transition_frames` argument. **Single Prompt with Constraints:** Generation can be constrained by providing a constraints JSON file (see the [Constraints Format Definition](constraints.md)). ```bash kimodo_gen "A person walks forward and picks something up from the ground." \ --model Kimodo-SOMA-RP-v1 \ --duration 5.0 \ --constraints kimodo/assets/demo/examples/kimodo-soma-rp/03_full_body_keyframes/constraints.json ``` Constraint files can be created and saved from the interactive demo or manually defined following the [constraints format guide](constraints.md). ## Output Formats For full details on output formats, see [this page](output_formats.md). To convert between these formats offline, see [Motion format conversion](motion_convert.md) (`kimodo_convert`). CLI generation uses a single **output stem** (`--output`) for all formats (NPZ, AMASS NPZ, CSV, and BVH). It can write either **one file** or **a folder of files**, depending on the number of samples: - **One sample** (`--num_samples 1`): writes a single file per format at the stem (e.g. `--output test` → `test.npz`, `test.csv`). No folder is created. For SMPLX, AMASS is written to `test_amass.npz`. - **Multiple samples**: creates a folder with that stem and writes one file per sample with suffixes `_00`, `_01`, etc. (e.g. `--output test` → `test/test_00.npz`, ...). Use the `--bvh` flag to also export BVH (SOMA only) to the same stem. ### Output Rest Pose For SOMA-based Kimodo models, motions can be exported with respect to two different rest poses. The default rest pose, that is always used by the `NPZ` format, is a standard T-pose consistent with the canonical T-pose of the SOMA model. For `BVH` outputs, the default rest pose is a non-standard pose, but it is consistent with the BVH format of the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed). To output a `BVH` file with the standard T-pose as the rest pose, you can use the `--bvh_standard_tpose` option. The standard T-pose used by Kimodo is available as a BVH file in the [repo assets](https://github.com/nv-tlabs/kimodo/tree/main/kimodo/assets/skeletons/somaskel77). ## Visualizing Generated Motions Motions generated with the CLI can be visualized in the demo UI. To do this, under "Load/Save" > "Motion", type in the path of the generated output npz file, then click "Load Motion" to load it into the viewer. If you used constraints when generating, those can also be loaded in in a similar way. ## Arguments To see all available flags, run `kimodo_gen --help`. They are: - `prompt`: Text description of the desired motion (required) - `--model`: Model name to use (default: `Kimodo-SOMA-RP-v1`; options are the models in [this table](../getting_started/quick_start.md#overview-kimodo-models)) - `--duration`: Motion duration in seconds (default: `5.0`). For multiple prompts, pass space-separated durations in a string. - `--diffusion_steps`: Number of denoising steps (default: `100`) - `--num_samples`: Number of motion variations to generate (default: `1`) - `--num_transition_frames`: Frames used to blend between prompts (default: `5`) - `--constraints`: Path to a JSON file containing constraints - `--output`: Output stem name (default: `output`). Used for all formats (NPZ, AMASS NPZ, CSV, BVH). With one sample, writes a single file per format (e.g. `test.npz`, `test.csv`). With multiple samples, creates a folder and writes `test_00.npz`, `test_01.npz`, … inside it. For SMPLX with one sample, AMASS is written to `stem_amass.npz` so it does not overwrite the main NPZ. - `--save_example_dir`: If given, saves outputs to an "example" directory structure that can be loaded in the Kimodo demo. - `--bvh`: Optional flag. When set, also export BVH (SOMA models only) using the same stem as `--output`. - `--bvh_standard_tpose`: If exporting BVH, export with the rest pose being the standard T-pose rather than the rest pose consistent with the BONES-SEED dataset. - `--seed`: Seed for reproducible results - `--no-postprocess`: Disable post-processing (includes foot skate cleanup and constraint optimization) - `--input_folder`: Folder containing meta.json and optional constraints.json. If set, generation settings are loaded from meta.json. These are found in demo example folders. - `--cfg_type`: Classifier-free guidance mode: `nocfg`, `regular`, or `separated` (the custom mode with independent text and constraint scales). See {ref}`Classifier-free guidance (details) ` below. - `--cfg_weight`: One float for `regular` CFG, or two floats `[text_weight, constraint_weight]` for `separated` CFG. If you pass only weights (no `--cfg_type`), one value implies `regular` and two imply `separated`. Not used with `nocfg`. :::{dropdown} Classifier-free guidance (CFG) :name: classifier-free-guidance-cfg The CLI mirrors the Python API in [Generation parameters](configuration.md): Kimodo supports standard CFG (`regular`) and a **separated** variant with two scales—text vs. constraints—which is the usual setting in this project. **Rules:** - `nocfg`: no weights; do not pass `--cfg_weight`. - `regular`: pass exactly one value after `--cfg_weight`. - `separated`: pass exactly two values after `--cfg_weight`. If you pass **`--cfg_type` or `--cfg_weight` on the command line**, those values override any `cfg` block in `meta.json` when using `--input_folder`. If you omit both flags, `meta.json` may still supply CFG via `cfg.enabled`, `cfg.text_weight`, and `cfg.constraint_weight` (same shape as the interactive demo examples). If there is no CLI CFG and no `cfg` in meta, the model uses its built-in defaults. Examples: ```bash # No classifier-free guidance kimodo_gen "A person walks." --cfg_type nocfg # Standard CFG (single scale) kimodo_gen "A person walks." --cfg_type regular --cfg_weight 2.5 # Separated CFG (text scale, then constraint scale) kimodo_gen "A person walks." --cfg_type separated --cfg_weight 2.0 1.5 # Infer mode from arity: one float -> regular; two floats -> separated kimodo_gen "A person walks." --cfg_weight 2.0 2.0 ``` ::: ## Python API The `kimodo/scripts/generate.py` script is a good place to start to familiarize yourself with the Python API of Kimodo if you'd like to use this directly. The full model API is detailed in the [API documentation](../api_reference/index.rst). If you want to use kimodo in another project, you can interact with it like this: ```python from kimodo import load_model model = load_model("kimodo-soma-rp", device="cuda") output = model( prompt="A person jumps", num_frames=150, num_denoising_steps=100, ) ``` ================================================ FILE: docs/source/user_guide/configuration.md ================================================ # Generation Parameters In the demo UI, command-line tool (`kimodo_gen` / `python -m kimodo.scripts.generate`), and low-level Python API, Kimodo allows some advanced configuration for motion generation. ## Classifier-Free Guidance Control the strength of text and constraint guidance: ```python output = model( prompt="A person jumps", num_frames=150, cfg_weight=[2.0, 2.0], # [text_weight, constraint_weight] cfg_type="separated", # Options: "nocfg", "regular", "separated" num_denoising_steps=100, ) ``` These are helpful when there is a tradeoff between following the prompt and hitting constraints. The CFG options are: - `cfg_type="nocfg"`: No guidance (faster, less controllable) - `cfg_type="regular"`: "Standard" classifier-free guidance - Equation: `out_uncond + w * (out_text_and_constraint - out_uncond)` - `cfg_type="separated"`: Separate weights for text and constraints - Equation: `out_uncond + w_text * (out_text - out_uncond) + w_constraint * (out_constraint - out_uncond)` ### CLI The same options are available from the command line as `--cfg_type` and `--cfg_weight`. See the {ref}`CLI user guide (CFG) ` for examples, validation rules, and how `meta.json` interacts with explicit flags when using `--input_folder`. ## Denoising Steps The number of denoising steps used in DDIM sampling can be used to control the speed vs. quality trade-off: - Fewer steps (50-100): Faster inference, slightly lower quality - More steps (100-200): Higher quality, slower inference ================================================ FILE: docs/source/user_guide/constraints.md ================================================ # Constraints JSON Format The `--constraints` flag in the CLI expects a JSON file containing a list of constraint objects. It is easiest to look at the examples provided with the demo to see how these are formatted. These can be seen for various model types in `kimodo/assets/demo/examples`. > Tip: the easiest way to get a valid constraints file is to create constraints in the interactive demo and to click on `Save Constraints`. ## High-Level Structure - The file is a JSON array: `[{...}, {...}, ...]` - Each element is an object with at least: - `type` (string) - `root2d`, `fullbody`, `left-hand`, `right-hand`, `left-foot`, `right-foot`, `end-effector` - `frame_indices` (array of integers): 0-based frame indices within the generated clip. ```{note} For SOMA models, constraints may be authored or displayed on the full `somaskel77` skeleton, but Kimodo converts them to the reduced `somaskel30` representation before passing them to the model. See the [skeleton](../key_concepts/skeleton.md) section for more details. ``` ## Coordinate Space and Units All spatial values in constraints use the same coordinate system as Kimodo's internal motion representation: - **Axes**: **Y-up**, with locomotion on the **XZ ground plane**. The Y axis points up, X and Z span the horizontal ground plane. - **Units**: **Meters**. Joint positions, root translations, and 2D root coordinates are all in meters. ### Canonicalization During training, every motion is *canonicalized* so that the (smoothed) root starts at the XZ origin `(0, 0)` at frame 0. The initial body heading (facing direction) is randomly rotated and passed to the model as an explicit input (`first_heading_angle`), so the model is robust to arbitrary initial orientations. At inference, constraints should be authored **relative to this canonical origin**: - `smooth_root_2d` values at frame 0 should be at `(0, 0)`, with subsequent frames expressing displacement from there. - `root_positions` XZ components follow the same convention; Y is the **absolute hip height above the ground** (typically ~0.9 m for a standing pose, lower for crouching/sitting). - `first_heading_angle` (a generation parameter, not part of the constraints JSON) defaults to `0.0` radians (facing +Z) but can be set to any value to change the initial facing direction. ### Field-specific notes | Field | Space | Notes | |-------|-------|-------| | `smooth_root_2d` | `[x, z]` ground plane (meters) | Relative to the canonical origin. | | `root_positions` | `[x, y, z]` (meters) | Y is absolute hip height above ground. XZ relative to canonical origin. | | `global_root_heading` | `[cos(θ), sin(θ)]` | **Not** a raw radian value — must be a 2-element cosine/sine pair per frame (i.e. the heading direction vector). | | `local_joints_rot` | axis-angle (radians) | Local joint rotations in the skeleton's rest-pose frame. | ### Constraints not at frame 0 Adding a constraint at frame 0 is **not** required. If the first constrained frame is later in the sequence (e.g. frame 45), Kimodo generates the initial frames freely from its learned distribution, starting near XZ = (0, 0) with the heading set by `first_heading_angle`. The constraint just needs to be reachable from that starting configuration given the text prompt and motion duration. ## Constraint Types Depending on `type`, additional fields are required or optional. All numeric arrays are plain nested JSON lists. In the following definitions `T` is the number of constrainted frames (i.e., number of `frame_indices`) and `J` is the number of skeleton joints. ### `root2d` This captures 2D root waypoints and 2D root paths. It requires: - `smooth_root_2d` (array shapes `[T, 2]`): Smoothed root positions `[x, z]` on the ground plane at the given `frame_indices`. and optionally: - `global_root_heading` (array shapes `[T, 2]`): Global root heading direction `[cos, sin]` at the given `frame_indices`. ### `fullbody` This captures full-body keyframe constraints on joint positions. It includes: - `local_joints_rot` (array shaped `[T, J, 3]`): Per-frame per-joint **axis-angle** local rotations (radians). Constraint joint positions will be derived from these. - `root_positions` (array shaped `[T, 3]`): Root (hips) translation `[x, y, z]`. - `smooth_root_2d` (optional; array of `[T, 2]`): Smoothed root positions `[x, z]`. If omitted, it is taken as the `[x, z]` components of `root_positions`. Note the `local_joint_rot` will not explicitly be constrained, the constraint will be on the joint positions that results from FK with the given joint rotations. ### `left-hand` / `right-hand` / `left-foot` / `right-foot` Captures end-effector constraints on the hand/feet joint positions and global rotations. These use the same fields as `fullbody`. However, under the hood these will only affect the corresponding end-effectors and hips. Each of these types is a shorthand for `end-effector` with pre-set joint names. ### `end-effector` A general end-effector constraint that requires an additional field: - `joint_names` (array of strings): Which end-effectors to constrain (e.g. `["left_hand"]`, `["right_foot", "left_foot"]`). Available names depend on the skeleton; see the skeleton's `expand_joint_names()` for the full mapping. Otherwise uses the same fields as `fullbody` (`local_joints_rot`, `root_positions`, optional `smooth_root_2d`). ## Examples ### Root 2D waypoints ```json [ { "type": "root2d", "frame_indices": [0, 30, 60], "smooth_root_2d": [[0.0, 0.0], [0.5, 0.0], [1.0, 0.1]] } ] ``` ### Full-body keyframe ```json [ { "type": "fullbody", "frame_indices": [60], "root_positions": [[0.0, 0.96, 1.5]], "local_joints_rot": [[[0.0, 0.0, 0.0], "... one [3] per joint ..."]] } ] ``` Here `root_positions` places the hips at x=0, y=0.96 m (standing height), z=1.5 m forward from the origin. `local_joints_rot` is a `[T, J, 3]` array of axis-angle rotations for every joint in the skeleton. ================================================ FILE: docs/source/user_guide/motion_convert.md ================================================ # Motion Format Conversion The `kimodo_convert` command converts between the formats described in [Output formats](output_formats.md): **Kimodo NPZ**, **AMASS NPZ** (SMPL-X), **SOMA BVH**, and **G1 MuJoCo CSV**. ## Frame rate (30 Hz Kimodo NPZ) Any conversion **to Kimodo NPZ** (from AMASS, SOMA BVH, or G1 CSV) **writes motion at 30 Hz**, matching Kimodo’s common generation rate. If the detected source rate differs, the tool **resamples** along time, then derived channels (contacts, smooth root, heading) are recomputed via forward kinematics. If resampling is required, a **warning** is emitted with the assumed source rate, input/output frame counts, and a reminder that `--source-fps` sets the **source** rate if autodetection is wrong. When the source is already ~30 Hz with the same frame count, no warning is shown (motion is only re-derived via FK for consistency).
Resampling strategy details The resampler picks one of two strategies based on the ratio `source_fps / target_fps`: - **Integer-ratio fast path** — When the ratio is close to an integer ≥ 2 (within a tolerance of 0.05), the resampler simply takes every *step*-th frame (`frames[::step]`). For example, 120 Hz → 30 Hz has ratio 4, so every 4th frame is kept. This is exact and very fast. - **Interpolation fallback** — Otherwise, the output timeline is linearly spaced over the input range. Root positions are linearly interpolated, and local joint rotations are interpolated via quaternion slerp. This handles arbitrary rate conversions (e.g. 50 Hz → 30 Hz). In both cases, `complete_motion_dict` is re-run at the target rate so that all derived channels (velocities, foot contacts, heading, smooth root) stay consistent with the new frame spacing.
## Usage ```bash kimodo_convert INPUT OUTPUT [options] ``` Formats are inferred from file extensions and (for `.npz`) from file contents. You can override with `--from` and `--to`. ### Supported conversions | From | To | Notes | |------|-----|--------| | AMASS `.npz` | Kimodo `.npz` | SMPL-X, 22 joints. Uses `--z-up` by default (same as Kimodo’s AMASS export). | | Kimodo `.npz` | AMASS `.npz` | Requires `local_rot_mats` with 22 joints (SMPL-X). | | SOMA `.bvh` | Kimodo `.npz` | Expects a **Kimodo-exported** SOMA BVH (same hierarchy as `save_motion_bvh`). If the BVH uses the standard T-pose as rest pose, pass in `--bvh_standard_tpose`. | | Kimodo `.npz` | SOMA `.bvh` | Accepts 77 joints (SOMA full) or 30 joints (somaskel30, auto-expanded to 77 with relaxed-hand rest poses). If you want the output BVH to use the standard T-pose as rest pose, pass in `--bvh_standard_tpose`. | | G1 `.csv` | Kimodo `.npz` | Rows of shape `(36,)` = root xyz + root quat + 29 joint angles (see [output_formats](output_formats.md#csv-format-for-kimodo-g1)). | | Kimodo `.npz` | G1 `.csv` | Requires 34 joints (G1). | ### Common options - **`--source-fps`**: Source motion frame rate in Hz (used before resampling to 30 Hz for Kimodo NPZ). If omitted, the tool auto-detects from `mocap_frame_rate` (AMASS), `Frame Time` (BVH), or defaults to **30** Hz. The legacy `--fps` alias is still accepted for backward compatibility. - **`--no-z-up`**: For AMASS, disable the Y-up ↔ Z-up transform (treat data as already in Kimodo Y-up, +Z forward). - **`--mujoco-rest-zero`**: For G1 CSV, match the `mujoco_rest_zero` flag used when the CSV was written (see `MujocoQposConverter.dict_to_qpos`). - **`--bvh_standard_tpose`**: If input or output is BVH: the BVH file uses the standard T-pose as its rest pose instead of the BONES-SEED rest pose. ### Examples ```bash # AMASS → Kimodo NPZ kimodo_convert motion_amass.npz motion_kimodo.npz # Kimodo NPZ → AMASS kimodo_convert motion_kimodo.npz motion_out_amass.npz # Kimodo SOMA NPZ → BVH kimodo_convert motion_kimodo.npz motion.bvh # BVH → Kimodo NPZ kimodo_convert motion.bvh motion_kimodo.npz # G1 CSV → Kimodo NPZ kimodo_convert motion.csv motion_kimodo.npz # Kimodo G1 NPZ → CSV kimodo_convert motion_kimodo.npz motion.csv ``` When both input and output are `.npz`, the tool assumes **AMASS → Kimodo** if the input is AMASS, and **Kimodo → AMASS** if the input is already a Kimodo NPZ. Use `--from` / `--to` if you need to disambiguate. ## Limitations - **BVH import** is intended for BVHs produced by Kimodo (`Root` wrapper + SOMA77 joint names) and is also compatible with the BONES-SEED dataset, which uses the same skeleton hierarchy. Arbitrary BVH files with different joint names or hierarchies may not work. - **G1 CSV** encodes only the degrees of freedom exposed in MuJoCo; the inverse path reconstructs local rotations from those angles (same convention as `to_qpos`). ================================================ FILE: docs/source/user_guide/output_formats.md ================================================ # Output Formats ## Converting Between Formats To convert between the formats described below, see [Motion format conversion](motion_convert.md) (`kimodo_convert`). ## Kimodo NPZ Format Generated motions are stored as NPZ files (one file per sample, e.g. `motion_00.npz`) containing: - `posed_joints`: Global joint positions `[T, J, 3]` - `global_rot_mats`: Global joint rotation matrices `[T, J, 3, 3]` - `local_rot_mats`: Local (parent-relative) joint rotation matrices `[T, J, 3, 3]` - `foot_contacts`: Foot contact labels [left heel, left toe, right heel, right toes] `[T, 4]` - `smooth_root_pos`: Smoothed root representations outputted from the model `[T, 3]` - `root_positions`: The (non-smoothed) trajectory of the actual root joint (e.g., pelvis) `[T, 3]` - `global_root_heading`: The heading direction output from the model `[T, 2]` Where: - `T`: number of frames - `J`: number of joints in the exported skeleton representation (`77` for SOMA NPZ exports, `34` for G1, `22` for SMPL-X) If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc. For SOMA models, the exported NPZ uses the full **`somaskel77`** skeleton even though the model itself operates internally on the reduced **`somaskel30`** skeleton. This means the saved `posed_joints`, `global_rot_mats`, and `local_rot_mats` arrays are written in the 77-joint SOMA layout. Older 30-joint SOMA NPZ files may still exist and remain loadable for backward compatibility. Also for SOMA models, the output motion is saved such that the rest pose (i.e. zero pose) is the standard T-pose that Kimodo uses internally. This differs from the default behavior of BVH export (see below), which uses a rest pose consistent with the BONES-SEED dataset format. The standard T-pose as a BVH file is also available [in the assets of the repo](https://github.com/nv-tlabs/kimodo/tree/main/kimodo/assets/skeletons/somaskel77). ## BVH Format for Kimodo-SOMA When using a SOMA model and passing the `--bvh` flag to CLI generation, Kimodo also writes a BVH file alongside the NPZ output. - BVH export is supported for **SOMA models only** - the exported hierarchy uses the full **`somaskel77`** skeleton - if the motion is still in internal `somaskel30` form, Kimodo converts it to `somaskel77` before writing the BVH - the file stores root translation plus per-joint local rotations for the clip at the generated frame rate - by default, the rest pose (i.e., zero pose) of the saved BVH file is consistent with the BONES-SEED dataset format. If you prefer a standard T-pose as the rest pose, pass in `--bvh_standard_tpose` when generating. The exporter writes a standard plain-text BVH file and scales joint offsets and root motion from meters to centimeters (same format as the SEED dataset release). If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc. ## CSV Format for Kimodo-G1 When using `Kimodo-G1` models and providing `--output` to CLI generation, the exporter writes MuJoCo `qpos` data to a CSV file. Each row corresponds to a pose in the motion and contains 36 values: - Root translation `[x, y, z]` - Root rotation quaternion `[w, x, y, z]` - 29 joint 1-DoF values (in G1 joint order) The CSV uses the MuJoCo coordinate system (z-up, +x forward). If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc. ## AMASS NPZ Format for Kimodo-SMPLX When using the `Kimodo-SMPLX-RP` model and `--output` is specified to CLI generation, the exporter writes an AMASS-style SMPL-X `.npz` file. Keys include: - `trans`: Root translation `[T, 3]` - `root_orient`: Root orientation axis-angle `[T, 3]` - `pose_body`: Body pose axis-angle `[T, 63]` (21 joints x 3) - `pose_hand`: Hand pose axis-angle `[T, 90]` (15 joints x 2 hands x 3) - `pose_jaw`: Jaw pose axis-angle `[T, 3]` - `pose_eye`: Eye pose axis-angle `[T, 6]` - `betas`: Shape coefficients - `num_betas`: Number of shape coefficients - `gender`: `neutral` - `surface_model_type`: `smplx` - `mocap_frame_rate`: Frame rate (fps) - `mocap_time_length`: Motion duration in seconds The exporter converts from the Kimodo coordinate system (y-up, +z forward) to AMASS coordinates (z-up, +y forward). If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc. ================================================ FILE: docs/source/user_guide/seed_dataset.md ================================================ # Loading BONES-SEED BVH data The [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed) is a publicly available optical motion-capture dataset distributed as BVH files with the [SOMA 77-joint skeleton](../key_concepts/skeleton.md). This page walks through the steps to parse a SEED BVH file and convert it into Kimodo's internal motion representation. This is a similar pipeline used by the benchmark to extract ground-truth motions from SEED data (see the [benchmark pipeline](../benchmark/pipeline.md)). ## Step-by-Step Conversion ### 1. Parse the BVH file `parse_bvh_motion` reads a BVH file and returns local joint rotation matrices, root translation (in meters), and the source frame rate. ```python from kimodo.skeleton.bvh import parse_bvh_motion local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(bvh_path) ``` ### 2. Subsample to 30 FPS Kimodo operates at 30 Hz. If the source BVH has a different frame rate (120 FPS for BONES-SEED), subsample by striding: ```python fps = 30 step = round(bvh_fps / fps) root_trans = root_trans[::step] local_rot_mats = local_rot_mats[::step] ``` ### 3. Convert to the standard T-pose The SEED BVH rest pose differs from Kimodo's canonical T-pose. The `to_standard_tpose` function remaps the local rotations accordingly and returns both local and global rotation matrices: ```python from kimodo.skeleton import SOMASkeleton77 skeleton = SOMASkeleton77() local_rot_mats, global_rot_mats = skeleton.to_standard_tpose(local_rot_mats) ``` ### 4. Compute Kimodo motion features Build the motion feature tensor used by the model. The feature layout is described in [Motion representation](../key_concepts/motion_representation.md). ```python from kimodo.motion_rep import KimodoMotionRep motion_rep = KimodoMotionRep(skeleton, fps) feats = motion_rep(local_rot_mats, root_trans, to_normalize=False) ``` ### 5. Canonicalize (optionally) and recover the motion dictionary Canonicalize so that the motion starts at the origin facing +Z, then invert the features back into a full motion dictionary: ```python can_feats = motion_rep.canonicalize(feats) motion_dict = motion_rep.inverse(can_feats, is_normalized=False) ``` `motion_dict` is a dictionary with keys such as `local_rot_mats`, `global_rot_mats`, `posed_joints`, `root_positions`, `smooth_root_pos`, `foot_contacts`, etc. See [Output formats](output_formats.md) for details on the Kimodo NPZ layout. ## Full script ```python from kimodo.motion_rep import KimodoMotionRep from kimodo.skeleton import SOMASkeleton77 from kimodo.skeleton.bvh import parse_bvh_motion # 1. Parse BVH local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(bvh_path) # 2. Subsample to 30 fps fps = 30 step = round(bvh_fps / fps) root_trans = root_trans[::step] local_rot_mats = local_rot_mats[::step] # 3. Convert to standard T-pose skeleton = SOMASkeleton77() local_rot_mats, global_rot_mats = skeleton.to_standard_tpose(local_rot_mats) # 4. Compute motion features motion_rep = KimodoMotionRep(skeleton, fps) feats = motion_rep(local_rot_mats, root_trans, to_normalize=False) # 5. Canonicalize and get the full motion dictionary can_feats = motion_rep.canonicalize(feats) motion_dict = motion_rep.inverse(can_feats, is_normalized=False) ``` ================================================ FILE: kimodo/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Kimodo: text-driven and constrained motion generation model.""" from .model.load_model import AVAILABLE_MODELS, DEFAULT_MODEL, load_model __all__ = [ "AVAILABLE_MODELS", "DEFAULT_MODEL", "load_model", ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/01_single_text_prompt/meta.json ================================================ { "text": "A person walking forward quickly stumbles but maintains their balance", "duration": 5.0, "num_samples": 1, "seed": 43, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/02_multi_text_ee_constraint/constraints.json ================================================ [ { "type": "left-hand", "frame_indices": [ 40, 155 ], "local_joints_rot": [ [ [ -0.20672118663787842, 0.0004979433142580092, 0.010066316463053226 ], [ 0.0789145976305008, 0.0008333905134350061, -5.267082087812014e-05 ], [ -0.1686924546957016, -0.0027884345036000013, 0.0520743690431118 ], [ 0.000989485066384077, 0.1385614573955536, 0.0005803265958093107 ], [ 1.0274103879928589, -0.0004089517460670322, 0.0007986496202647686 ], [ -0.39034226536750793, -0.001306047779507935, -4.922552761854604e-05 ], [ 0.0023066187277436256, -0.0007853881106711924, -0.0062883589416742325 ], [ 4.49517356173601e-05, 0.0033443598076701164, -0.0014551420463249087 ], [ 0.07268467545509338, -0.0011258760932832956, -3.953919076593593e-05 ], [ -0.1719113141298294, 0.018712127581238747, 0.06082615628838539 ], [ 0.0011432868195697665, 0.02744375728070736, 0.0025501118507236242 ], [ 0.41685307025909424, -0.002692570211365819, -0.0006283970433287323 ], [ -0.1283608227968216, 0.0030534265097230673, 0.00016949126438703388 ], [ -0.005590266548097134, 0.0014076301595196128, -0.038615260273218155 ], [ -0.00013014793512411416, 0.001360177993774414, 6.41088408883661e-05 ], [ 0.00010043015936389565, -0.01370090153068304, -0.00014910128084011376 ], [ 0.00023336269077844918, 0.0025421029422432184, 0.04833226650953293 ], [ 0.056574925780296326, 0.0006874562823213637, 0.0004548647266346961 ], [ -0.37481847405433655, -0.054357241839170456, 0.2803272306919098 ], [ 0.0013725318713113666, 0.009074348025023937, -0.0021504403557628393 ], [ -0.0012184121878817677, -0.4267229437828064, 0.011203057132661343 ], [ 1.255251407623291, 0.0009449978824704885, 0.0010158077348023653 ], [ -0.003570390399545431, -0.003947308287024498, -0.5030224323272705 ], [ 0.1881941556930542, -0.000495748536195606, 0.0016725400928407907 ], [ -0.002223622752353549, 0.11821465194225311, 0.007546884939074516 ], [ -0.00137770373839885, -0.0031452146358788013, -0.0015015294775366783 ], [ -0.3751647472381592, 0.05314668267965317, -0.28086331486701965 ], [ -0.007756246719509363, -0.016310883685946465, -0.02847120724618435 ], [ -0.0002517815155442804, 0.427451491355896, 3.640262002591044e-05 ], [ 1.2455408573150635, -0.0014789876295253634, 0.0008519256953150034 ], [ 0.004311776254326105, 0.009671058505773544, 0.5968337655067444 ], [ 0.1335560381412506, 0.0011528844479471445, -0.0008361327927559614 ], [ 0.001167859067209065, -0.1551152616739273, 0.00019725598394870758 ], [ -0.0014258474111557007, 0.0034801543224602938, 0.0009809854673221707 ] ], [ [ -0.047659896314144135, -0.11130385845899582, -0.0020901868119835854 ], [ -1.5705475807189941, -0.0014125468442216516, -0.0008221857133321464 ], [ -0.16147980093955994, 0.014729475602507591, 0.4458121657371521 ], [ -0.00045561062870547175, -0.1160486489534378, -0.006125911604613066 ], [ 2.811251401901245, 0.0016747766640037298, -0.005349006038159132 ], [ -0.8591147065162659, 0.0037903853226453066, 0.00048354381578974426 ], [ 0.006445891689509153, -0.0036706889513880014, -0.03472399711608887 ], [ -0.001481462037190795, 0.0015367366140708327, -0.0015593112912029028 ], [ -1.5751848220825195, 0.001112997648306191, 0.0009848373010754585 ], [ -0.16862420737743378, -0.016877643764019012, -0.26229384541511536 ], [ -9.055795817403123e-05, 0.09453120082616806, -0.0134742371737957 ], [ 2.811314344406128, 0.003919574897736311, 0.005575981922447681 ], [ -0.8299098014831543, -0.003791244002059102, 0.0012802339624613523 ], [ 0.005852710455656052, 0.005849692039191723, 0.1632416546344757 ], [ -0.0015579514438286424, 9.288851288147271e-05, 0.001196552417241037 ], [ 0.00043879495933651924, 0.04429133981466293, 0.0002551022043917328 ], [ -0.0019886596128344536, 0.008745947852730751, -0.00962099153548479 ], [ 0.5197923183441162, -0.0010678194230422378, 0.0002590256044641137 ], [ -0.9051622152328491, -0.12138096988201141, 0.25749173760414124 ], [ 0.010689850896596909, -0.01072163600474596, 0.20382197201251984 ], [ -0.0009684870601631701, -0.5894762873649597, 0.0032688004430383444 ], [ 1.30536949634552, -0.002206705743446946, -0.0020471925381571054 ], [ 0.0067055909894406796, -0.015674468129873276, -0.9086763262748718 ], [ -0.26612186431884766, -0.00016191616305150092, 0.002851327648386359 ], [ 0.003539646975696087, 0.20451955497264862, -0.02575569413602352 ], [ 0.003367731347680092, 0.0018452388467267156, -0.00026573429931886494 ], [ -0.9464634656906128, 0.12737642228603363, -0.2577688992023468 ], [ 0.00046661958913318813, -0.008693858049809933, -0.19606870412826538 ], [ -0.0058177076280117035, 0.6349377036094666, -0.0003108184027951211 ], [ 1.4694209098815918, 0.0046353572979569435, 0.002392316237092018 ], [ 0.022281549870967865, 0.006433307193219662, 1.1441218852996826 ], [ -0.16217999160289764, -0.0005673008854500949, -0.0028868752997368574 ], [ 0.0011142585426568985, 0.036793302744627, 0.06873425096273422 ], [ 0.001964340452104807, -0.004202086944133043, 0.0034294212237000465 ] ] ], "root_positions": [ [ 0.014979152008891106, 0.7896444201469421, 0.8725281357765198 ], [ 0.12546521425247192, 0.30551770329475403, 2.3331315517425537 ] ], "smooth_root_2d": [ [ 0.014979152008891106, 0.8725281357765198 ], [ 0.12546521425247192, 2.3331315517425537 ] ] }, { "type": "right-hand", "frame_indices": [ 40, 155 ], "local_joints_rot": [ [ [ -0.20672118663787842, 0.0004979433142580092, 0.010066316463053226 ], [ 0.0789145976305008, 0.0008333905134350061, -5.267082087812014e-05 ], [ -0.1686924546957016, -0.0027884345036000013, 0.0520743690431118 ], [ 0.000989485066384077, 0.1385614573955536, 0.0005803265958093107 ], [ 1.0274103879928589, -0.0004089517460670322, 0.0007986496202647686 ], [ -0.39034226536750793, -0.001306047779507935, -4.922552761854604e-05 ], [ 0.0023066187277436256, -0.0007853881106711924, -0.0062883589416742325 ], [ 4.49517356173601e-05, 0.0033443598076701164, -0.0014551420463249087 ], [ 0.07268467545509338, -0.0011258760932832956, -3.953919076593593e-05 ], [ -0.1719113141298294, 0.018712127581238747, 0.06082615628838539 ], [ 0.0011432868195697665, 0.02744375728070736, 0.0025501118507236242 ], [ 0.41685307025909424, -0.002692570211365819, -0.0006283970433287323 ], [ -0.1283608227968216, 0.0030534265097230673, 0.00016949126438703388 ], [ -0.005590266548097134, 0.0014076301595196128, -0.038615260273218155 ], [ -0.00013014793512411416, 0.001360177993774414, 6.41088408883661e-05 ], [ 0.00010043015936389565, -0.01370090153068304, -0.00014910128084011376 ], [ 0.00023336269077844918, 0.0025421029422432184, 0.04833226650953293 ], [ 0.056574925780296326, 0.0006874562823213637, 0.0004548647266346961 ], [ -0.37481847405433655, -0.054357241839170456, 0.2803272306919098 ], [ 0.0013725318713113666, 0.009074348025023937, -0.0021504403557628393 ], [ -0.0012184121878817677, -0.4267229437828064, 0.011203057132661343 ], [ 1.255251407623291, 0.0009449978824704885, 0.0010158077348023653 ], [ -0.003570390399545431, -0.003947308287024498, -0.5030224323272705 ], [ 0.1881941556930542, -0.000495748536195606, 0.0016725400928407907 ], [ -0.002223622752353549, 0.11821465194225311, 0.007546884939074516 ], [ -0.00137770373839885, -0.0031452146358788013, -0.0015015294775366783 ], [ -0.3751647472381592, 0.05314668267965317, -0.28086331486701965 ], [ -0.007756246719509363, -0.016310883685946465, -0.02847120724618435 ], [ -0.0002517815155442804, 0.427451491355896, 3.640262002591044e-05 ], [ 1.2455408573150635, -0.0014789876295253634, 0.0008519256953150034 ], [ 0.004311776254326105, 0.009671058505773544, 0.5968337655067444 ], [ 0.1335560381412506, 0.0011528844479471445, -0.0008361327927559614 ], [ 0.001167859067209065, -0.1551152616739273, 0.00019725598394870758 ], [ -0.0014258474111557007, 0.0034801543224602938, 0.0009809854673221707 ] ], [ [ -0.047659896314144135, -0.11130385845899582, -0.0020901868119835854 ], [ -1.5705475807189941, -0.0014125468442216516, -0.0008221857133321464 ], [ -0.16147980093955994, 0.014729475602507591, 0.4458121657371521 ], [ -0.00045561062870547175, -0.1160486489534378, -0.006125911604613066 ], [ 2.811251401901245, 0.0016747766640037298, -0.005349006038159132 ], [ -0.8591147065162659, 0.0037903853226453066, 0.00048354381578974426 ], [ 0.006445891689509153, -0.0036706889513880014, -0.03472399711608887 ], [ -0.001481462037190795, 0.0015367366140708327, -0.0015593112912029028 ], [ -1.5751848220825195, 0.001112997648306191, 0.0009848373010754585 ], [ -0.16862420737743378, -0.016877643764019012, -0.26229384541511536 ], [ -9.055795817403123e-05, 0.09453120082616806, -0.0134742371737957 ], [ 2.811314344406128, 0.003919574897736311, 0.005575981922447681 ], [ -0.8299098014831543, -0.003791244002059102, 0.0012802339624613523 ], [ 0.005852710455656052, 0.005849692039191723, 0.1632416546344757 ], [ -0.0015579514438286424, 9.288851288147271e-05, 0.001196552417241037 ], [ 0.00043879495933651924, 0.04429133981466293, 0.0002551022043917328 ], [ -0.0019886596128344536, 0.008745947852730751, -0.00962099153548479 ], [ 0.5197923183441162, -0.0010678194230422378, 0.0002590256044641137 ], [ -0.9051622152328491, -0.12138096988201141, 0.25749173760414124 ], [ 0.010689850896596909, -0.01072163600474596, 0.20382197201251984 ], [ -0.0009684870601631701, -0.5894762873649597, 0.0032688004430383444 ], [ 1.30536949634552, -0.002206705743446946, -0.0020471925381571054 ], [ 0.0067055909894406796, -0.015674468129873276, -0.9086763262748718 ], [ -0.26612186431884766, -0.00016191616305150092, 0.002851327648386359 ], [ 0.003539646975696087, 0.20451955497264862, -0.02575569413602352 ], [ 0.003367731347680092, 0.0018452388467267156, -0.00026573429931886494 ], [ -0.9464634656906128, 0.12737642228603363, -0.2577688992023468 ], [ 0.00046661958913318813, -0.008693858049809933, -0.19606870412826538 ], [ -0.0058177076280117035, 0.6349377036094666, -0.0003108184027951211 ], [ 1.4694209098815918, 0.0046353572979569435, 0.002392316237092018 ], [ 0.022281549870967865, 0.006433307193219662, 1.1441218852996826 ], [ -0.16217999160289764, -0.0005673008854500949, -0.0028868752997368574 ], [ 0.0011142585426568985, 0.036793302744627, 0.06873425096273422 ], [ 0.001964340452104807, -0.004202086944133043, 0.0034294212237000465 ] ] ], "root_positions": [ [ 0.014979152008891106, 0.7896444201469421, 0.8725281357765198 ], [ 0.12546521425247192, 0.30551770329475403, 2.3331315517425537 ] ], "smooth_root_2d": [ [ 0.014979152008891106, 0.8725281357765198 ], [ 0.12546521425247192, 2.3331315517425537 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/02_multi_text_ee_constraint/meta.json ================================================ { "texts": [ "A person walks forward while carrying a box", "A person sets a box down onto the ground" ], "durations": [ 3.533333333333333, 4.066666666666666 ], "num_samples": 1, "seed": 60, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 1.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/03_full_body_keyframes/constraints.json ================================================ [ { "type": "fullbody", "frame_indices": [ 59, 106, 148 ], "local_joints_rot": [ [ [ 0.42420727014541626, 0.058721136301755905, -0.1945635825395584 ], [ -0.5268475413322449, -0.0005157420528121293, 0.0004701620200648904 ], [ -0.17267920076847076, 0.027239520102739334, 0.36560261249542236 ], [ 0.004160718061029911, -0.22976335883140564, 0.010524176992475986 ], [ 1.5825881958007812, -0.01814083196222782, -0.00019598894868977368 ], [ -0.8827329277992249, 0.009902671910822392, -0.00021610780095215887 ], [ 0.0067768096923828125, -0.013547217473387718, -0.16673408448696136 ], [ 0.0006806282908655703, 0.004601094871759415, -0.0043960982002317905 ], [ -1.4894901514053345, -0.003371267579495907, -0.001970127457752824 ], [ -0.17904962599277496, 0.004051337484270334, 0.19225701689720154 ], [ -0.0033012183848768473, -0.29656991362571716, 0.004984850063920021 ], [ 1.5931552648544312, -0.007282367907464504, -0.0052862209267914295 ], [ -0.35364261269569397, 0.0049067274667322636, 0.0010333984391763806 ], [ 0.0023804877419024706, -0.005421861540526152, -0.19129839539527893 ], [ 0.0008946731686592102, 0.0049979668110609055, -0.0008540445705875754 ], [ -0.00037546976818703115, -0.09826900064945221, 0.0006841858848929405 ], [ 0.004415650386363268, 0.0112489964812994, 0.025344429537653923 ], [ 0.5182019472122192, 0.002875699894502759, 0.002064053900539875 ], [ -0.7899102568626404, -0.11301380395889282, 0.261331170797348 ], [ -0.004763631150126457, 0.003188431030139327, 0.191846564412117 ], [ -0.0006821855786256492, -0.24938665330410004, 0.0013275814708322287 ], [ 1.1367335319519043, 0.0038948820438236, 0.0009569167159497738 ], [ 0.006261332891881466, 0.020894864574074745, -1.050469160079956 ], [ 0.06118401885032654, 0.0005131644429638982, 0.00042430072790011764 ], [ 0.0017778673209249973, 0.08777552843093872, -0.044312309473752975 ], [ -0.0006084830965846777, 0.0022449076641350985, -0.001873409142717719 ], [ 0.33878403902053833, -0.04740850627422333, -0.2796333432197571 ], [ 0.02221747301518917, 0.013649695552885532, -0.11847231537103653 ], [ 0.007714178413152695, 0.6182990074157715, 0.009067214094102383 ], [ 0.8923805952072144, -0.00016622581460978836, 0.0021162345074117184 ], [ 0.0038995807990431786, -0.006832453887909651, 0.3025287687778473 ], [ 0.03307999297976494, 0.0005516205565072596, 0.0009820020059123635 ], [ 0.0015379488468170166, -0.08221427351236343, -0.014401843771338463 ], [ -0.00022057670867070556, 0.002010792726650834, 0.0012923656031489372 ] ], [ [ -0.08197958767414093, 0.10326994955539703, -0.1510602980852127 ], [ 0.28157129883766174, 0.0011461800895631313, 0.000703590689226985 ], [ -0.182321235537529, 0.05269569158554077, 0.2730983793735504 ], [ -0.0003947282093577087, 0.09641454368829727, 0.0040251282043755054 ], [ 1.089223861694336, -0.00700604822486639, -0.002539312234148383 ], [ -0.09248486906290054, 0.003849609522148967, 0.0016473153373226523 ], [ -0.010541710071265697, 0.004344945307821035, 0.07663393765687943 ], [ -0.00044715296826325357, -0.004340745974332094, 0.007171581499278545 ], [ -0.3379390239715576, 0.0015806800220161676, -0.0003471111413091421 ], [ -0.1781967729330063, 0.016616491600871086, 0.1652776598930359 ], [ -0.002019439358264208, -0.11581386625766754, 0.0009603232610970736 ], [ 0.6794841289520264, -5.403390241554007e-05, -0.0012657493352890015 ], [ -0.09013757854700089, 0.0018549489323049784, -0.000238976048422046 ], [ -0.0009166855015791953, -0.0007138565997593105, -0.0742788091301918 ], [ -0.0009655999601818621, 0.0029521933756768703, -0.00039851426845416427 ], [ -0.0006129079265519977, -0.19495022296905518, -0.0019512351136654615 ], [ 0.0019297772087156773, -0.0025066917296499014, 0.1518552601337433 ], [ 0.18073193728923798, -0.0008597049745731056, 0.00023304206843022257 ], [ -0.19048453867435455, -0.02173178642988205, 0.2785468101501465 ], [ 0.0032724339980632067, 0.001481848070397973, 0.00837984960526228 ], [ 0.0037242062389850616, -0.19455766677856445, 0.009616612456738949 ], [ -0.19767794013023376, 0.004192049615085125, 0.004219892434775829 ], [ -0.018522148951888084, 0.01758752018213272, -1.4997444152832031 ], [ -0.07066819816827774, -0.0006776255904696882, 0.00122307357378304 ], [ 0.007704276591539383, 0.14503517746925354, 0.0951184555888176 ], [ 0.004533262457698584, -0.0066575342789292336, -0.010643035173416138 ], [ 0.3773331642150879, -0.05414784327149391, -0.2780730128288269 ], [ 0.003753547091037035, 0.002539943205192685, 0.12321871519088745 ], [ -0.004724413156509399, 0.46992960572242737, 0.001832474721595645 ], [ 1.2976007461547852, 0.0007234009681269526, -0.001626322278752923 ], [ -0.0016050372505560517, -0.00880438182502985, 0.17947044968605042 ], [ 0.05334911122918129, -0.00018671243742574006, 0.0010833276901394129 ], [ -0.0015367609448730946, -0.05425700917840004, 0.01668459363281727 ], [ -0.00021225935779511929, 0.001713683595880866, 0.0009809889597818255 ] ], [ [ -0.21817633509635925, -0.012708673253655434, -0.029821090400218964 ], [ 0.3743710219860077, 0.0007941523799672723, 0.00032366320374421775 ], [ -0.16750676929950714, 0.003018906805664301, 0.07928019016981125 ], [ -0.0003895726113114506, 0.030501781031489372, 0.0013912678696215153 ], [ 0.2578306794166565, -0.0026517061050981283, -0.0001139347514254041 ], [ -0.227533221244812, 0.0004564583650790155, -0.0004175934591330588 ], [ -0.0015815469669178128, 0.0026496825739741325, -0.017801448702812195 ], [ 0.00024288007989525795, 0.000392801477573812, -2.9845070457668044e-05 ], [ 0.31938642263412476, -0.0006790655897930264, -0.0004276619874872267 ], [ -0.17199693620204926, 0.007707139942795038, 0.014987054280936718 ], [ 0.0012992072151973844, 0.003620905103161931, -0.001210421440191567 ], [ 0.22853288054466248, -0.0018797506345435977, -0.0002660619793459773 ], [ -0.1335543692111969, 0.0010313205420970917, 0.0001083972238120623 ], [ 0.003520265920087695, 0.0035283963661640882, 0.016698163002729416 ], [ 0.0001443400833522901, -0.001745356246829033, -2.3336755475611426e-05 ], [ 0.0003554633294697851, -0.05629483610391617, -0.0006463310564868152 ], [ -0.00298635708168149, 0.0020182463340461254, -0.03614736720919609 ], [ 0.21955031156539917, 0.0005465149879455566, 0.00011243963672313839 ], [ -0.0715053528547287, -0.010282701812684536, 0.28057143092155457 ], [ 0.0007245761225931346, 0.0019379559671506286, -0.018530432134866714 ], [ -0.0020012110471725464, -0.5585712194442749, 0.0002525273594073951 ], [ 1.1451164484024048, 0.000756395107600838, -0.00042264885269105434 ], [ -0.004087591078132391, -0.0022635578643530607, -0.1811828911304474 ], [ 0.15393203496932983, -0.00010327681229682639, 0.000951180059928447 ], [ -0.0005707733216695487, 0.07005079090595245, -0.0018504050094634295 ], [ -0.0013123765820637345, -0.0004375300486572087, 0.0002970081695821136 ], [ -0.09115279465913773, 0.013008617796003819, -0.2808595299720764 ], [ 0.0015214721206575632, -0.007811791729182005, 0.031220799311995506 ], [ -0.00048553026863373816, 0.5777612328529358, 0.0003351669874973595 ], [ 1.0913182497024536, 0.0011191898956894875, -0.0027903772424906492 ], [ 0.000775794149376452, 0.00010774911061162129, 0.10287072509527206 ], [ 0.0997936949133873, 0.0003015398688148707, -0.0006937433499842882 ], [ 0.0003619014751166105, -0.18787385523319244, -0.0010270585771650076 ], [ -0.001584835583344102, 0.0037561857607215643, -0.002414965769276023 ] ] ], "root_positions": [ [ -0.17535515129566193, 0.5689253807067871, 0.9417929649353027 ], [ -0.16934014856815338, 0.7382326722145081, 2.169330596923828 ], [ -0.1823902279138565, 0.7819305658340454, 2.954490900039673 ] ], "smooth_root_2d": [ [ -0.17535515129566193, 0.9417929649353027 ], [ -0.16934014856815338, 2.169330596923828 ], [ -0.1823902279138565, 2.954490900039673 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/03_full_body_keyframes/meta.json ================================================ { "text": "A person walking forward picks up something off the ground", "duration": 5.0, "num_samples": 1, "seed": 51, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 1.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/04_ee_constraint/constraints.json ================================================ [ { "type": "right-hand", "frame_indices": [ 129, 93, 0 ], "local_joints_rot": [ [ [ -0.11223886162042618, 0.20531758666038513, 0.13568778336048126 ], [ 0.1075688898563385, 0.0032202948350459337, 0.0006892754463478923 ], [ -0.17058254778385162, -0.011657492257654667, -0.23103317618370056 ], [ -0.02866872400045395, 0.4262913167476654, -0.010209682397544384 ], [ 0.2924644649028778, 0.007188746705651283, 0.0005000674282200634 ], [ -0.13080132007598877, -0.0029640060383826494, -0.0007075564353726804 ], [ -0.005761375650763512, -0.002191383158788085, 0.15397773683071136 ], [ -0.00023041102394927293, -0.0010889451950788498, 0.0007837787852622569 ], [ -0.3537895977497101, -0.0006238390924409032, -0.0010272490326315165 ], [ -0.16032733023166656, -0.02506295032799244, -0.22620464861392975 ], [ 0.0138308797031641, 0.21655774116516113, 0.0317748561501503 ], [ 1.5745534896850586, 0.003732866607606411, 0.0021063678432255983 ], [ -0.17066748440265656, -0.002285068854689598, -0.0029538189992308617 ], [ 0.02313886024057865, -0.07020875811576843, -0.05658446252346039 ], [ 2.5580025976523757e-05, 0.004435115493834019, -0.006514436099678278 ], [ 0.0015886364271864295, -0.292732834815979, -0.0014166575856506824 ], [ -0.008558829315006733, 0.0066470191814005375, -0.010221566073596478 ], [ 0.10141321271657944, -0.0028386565390974283, -0.0006978976307436824 ], [ 0.002506372518837452, 0.001101600006222725, 0.2779805362224579 ], [ 0.020367039367556572, -0.028616085648536682, 0.0971180647611618 ], [ -0.011572631075978279, -0.5930124521255493, -0.026975814253091812 ], [ 0.9286840558052063, -0.00046807233593426645, -0.00013331411173567176 ], [ -0.031172338873147964, -0.04484722763299942, 0.03643424063920975 ], [ 0.03150894120335579, -0.00101278827060014, 0.0015338404336944222 ], [ 0.0005915925721637905, 0.0930531769990921, -0.028835415840148926 ], [ -0.001440802589058876, 0.0010614224011078477, 0.0006542576011270285 ], [ -0.4149414598941803, 0.06656259298324585, -0.2730332314968109 ], [ -0.006371266208589077, -0.02150307223200798, -1.3590242862701416 ], [ 0.00956002902239561, -0.17155548930168152, 0.026624836027622223 ], [ 0.8084958791732788, -0.003991501871496439, 0.0007233448559418321 ], [ -0.020737944170832634, -0.011397535912692547, 0.14019189774990082 ], [ -0.18261606991291046, 0.005134414881467819, -0.001045998651534319 ], [ -0.028427572920918465, -0.6557883620262146, 0.038063470274209976 ], [ 0.005555277690291405, 0.012246276251971722, 0.004770371131598949 ] ], [ [ -0.06392758339643478, 0.3478183448314667, 0.1171446293592453 ], [ 0.12243298441171646, 0.003146131755784154, 0.00017438907525502145 ], [ -0.17841783165931702, -0.0256511103361845, -0.2805330455303192 ], [ -0.022625330835580826, 0.348234087228775, -0.009928824380040169 ], [ 0.28284141421318054, 0.009495020844042301, 0.0010556986089795828 ], [ -0.17478667199611664, -0.004891794174909592, -0.0013969563879072666 ], [ -0.002641322324052453, -0.005833400413393974, 0.20226475596427917 ], [ -0.0009078677394427359, -0.002073301700875163, 0.0012749496381729841 ], [ -0.48070675134658813, 0.0005347213009372354, -0.0004243548901285976 ], [ -0.16694584488868713, -0.03390314802527428, -0.09055406600236893 ], [ 0.009182179346680641, 0.1743844896554947, 0.01932411640882492 ], [ 1.6481772661209106, 0.0002097517135553062, 0.0010239556431770325 ], [ -0.17133140563964844, 0.0028362423181533813, -0.004689408931881189 ], [ 0.025385459885001183, -0.06771048158407211, -0.011561849154531956 ], [ -0.00012663791130762547, 0.001872184220701456, -0.002834505634382367 ], [ 0.001523697399534285, -0.48211750388145447, -0.0005278618773445487 ], [ -0.00822246354073286, -0.00923906546086073, -0.01643195189535618 ], [ 0.04035002365708351, -0.004922393709421158, -0.0005214703269302845 ], [ -0.02120170183479786, -0.000465662480564788, 0.27964550256729126 ], [ 0.042349521070718765, -0.043123405426740646, 0.21025802195072174 ], [ -0.01620035618543625, -0.5838293433189392, -0.03403719887137413 ], [ 1.1832103729248047, 0.0004754749243147671, -0.0014872060855850577 ], [ -0.040768858045339584, -0.04618615657091141, 0.04847611486911774 ], [ 0.04482508823275566, -0.0005392982857301831, 0.00035259113064967096 ], [ 0.00015537742001470178, -0.024237608537077904, -0.003044326091185212 ], [ -0.0012453795643523335, 0.004743263591080904, 0.004625802394002676 ], [ -0.14595142006874084, 0.0308919008821249, -0.2779163420200348 ], [ -0.03314027562737465, -0.07205720245838165, -1.3401029109954834 ], [ 0.02448190003633499, -0.468079537153244, 0.018310735002160072 ], [ 0.9222347140312195, -0.00624655419960618, -0.0003706512216012925 ], [ 0.0311859343200922, -0.01980999857187271, -0.4311404228210449 ], [ -0.05887744575738907, 0.009565972723066807, 0.0008855919586494565 ], [ -0.0638674795627594, -1.1882448196411133, -0.07744041085243225 ], [ 0.002320833969861269, 0.014880148693919182, 0.00827236007899046 ] ], [ [ 0.028708748519420624, 0.023731501772999763, -0.05906220152974129 ], [ 0.36697518825531006, 0.0020822372753173113, 9.442192094866186e-06 ], [ -0.17328320443630219, -0.029694421216845512, -0.2592017650604248 ], [ -0.027558816596865654, 0.44522055983543396, 0.00263651879504323 ], [ 0.45747342705726624, 0.006375299766659737, 0.000838644162286073 ], [ -0.29932498931884766, -0.0034287264570593834, -0.005712746176868677 ], [ 0.010242770425975323, 0.0686849057674408, 0.12300582230091095 ], [ 0.0019906593952327967, -0.006487288512289524, 0.004740884527564049 ], [ -0.417245090007782, 0.002172173699364066, -0.000527464144397527 ], [ -0.16229933500289917, -0.015825729817152023, 0.26093363761901855 ], [ -0.01547759398818016, -0.4560239017009735, -0.001296655391342938 ], [ 0.520811140537262, -0.016100304201245308, -0.0033653294667601585 ], [ -0.061035193502902985, 0.013747301883995533, 0.0011975782690569758 ], [ 0.002211581217125058, 0.013100380077958107, -0.41168421506881714 ], [ 0.000723487522918731, -0.0009448538185097277, -0.0023157261312007904 ], [ -0.0008414603653363883, -0.22904154658317566, 0.0037871438544243574 ], [ -0.004434449132531881, -0.0019493037834763527, 0.04349867254495621 ], [ 0.11113234609365463, -0.001496539101935923, -6.745033260813216e-06 ], [ 0.03568394109606743, 0.00850191805511713, 0.2815527021884918 ], [ 0.007574420887976885, -0.005988026969134808, -0.04585442319512367 ], [ -0.014899174682796001, -0.6360949277877808, 0.014495083130896091 ], [ 1.1318601369857788, -0.0009174949955195189, -0.008180576376616955 ], [ -0.038145799189805984, -0.05923198536038399, -0.04122990742325783 ], [ 0.07719366252422333, -0.0010574767366051674, 0.0009220906649716198 ], [ -0.0010063578374683857, 0.12876589596271515, -0.021289559081196785 ], [ -0.0008511252817697823, -0.0003541657351888716, -0.0006832815706729889 ], [ 0.10374817997217178, -0.014772959984838963, -0.28235113620758057 ], [ 0.029763568192720413, 0.00017807059339247644, 0.007343007251620293 ], [ -0.0064206854440271854, 0.3665950298309326, -0.0003897137939929962 ], [ 1.0820642709732056, -0.0005379249923862517, -0.0039028781466186047 ], [ -0.004170380067080259, 0.06480656564235687, -0.10721305757761002 ], [ -0.06350508332252502, 0.0011865347623825073, -0.0005369586870074272 ], [ -0.0021817537490278482, -0.08756759762763977, -0.008148521184921265 ], [ 0.00243115471675992, -0.003949992824345827, 0.005949904676526785 ] ] ], "root_positions": [ [ 2.639763593673706, 0.767427384853363, 2.341259479522705 ], [ 1.9461809396743774, 0.7706995010375977, 1.7243560552597046 ], [ 0.003315839683637023, 0.7415399551391602, -0.0012030001962557435 ] ], "smooth_root_2d": [ [ 2.639763593673706, 2.341259479522705 ], [ 1.9461809396743774, 1.7243560552597046 ], [ 0.003315839683637023, -0.0012030001962557435 ] ] }, { "type": "left-foot", "frame_indices": [ 93, 0 ], "local_joints_rot": [ [ [ -0.06392758339643478, 0.3478183448314667, 0.1171446293592453 ], [ 0.12243298441171646, 0.003146131755784154, 0.00017438907525502145 ], [ -0.17841783165931702, -0.0256511103361845, -0.2805330455303192 ], [ -0.022625330835580826, 0.348234087228775, -0.009928824380040169 ], [ 0.28284141421318054, 0.009495020844042301, 0.0010556986089795828 ], [ -0.17478667199611664, -0.004891794174909592, -0.0013969563879072666 ], [ -0.002641322324052453, -0.005833400413393974, 0.20226475596427917 ], [ -0.0009078677394427359, -0.002073301700875163, 0.0012749496381729841 ], [ -0.48070675134658813, 0.0005347213009372354, -0.0004243548901285976 ], [ -0.16694584488868713, -0.03390314802527428, -0.09055406600236893 ], [ 0.009182179346680641, 0.1743844896554947, 0.01932411640882492 ], [ 1.6481772661209106, 0.0002097517135553062, 0.0010239556431770325 ], [ -0.17133140563964844, 0.0028362423181533813, -0.004689408931881189 ], [ 0.025385459885001183, -0.06771048158407211, -0.011561849154531956 ], [ -0.00012663791130762547, 0.001872184220701456, -0.002834505634382367 ], [ 0.001523697399534285, -0.48211750388145447, -0.0005278618773445487 ], [ -0.00822246354073286, -0.00923906546086073, -0.01643195189535618 ], [ 0.04035002365708351, -0.004922393709421158, -0.0005214703269302845 ], [ -0.02120170183479786, -0.000465662480564788, 0.27964550256729126 ], [ 0.042349521070718765, -0.043123405426740646, 0.21025802195072174 ], [ -0.01620035618543625, -0.5838293433189392, -0.03403719887137413 ], [ 1.1832103729248047, 0.0004754749243147671, -0.0014872060855850577 ], [ -0.040768858045339584, -0.04618615657091141, 0.04847611486911774 ], [ 0.04482508823275566, -0.0005392982857301831, 0.00035259113064967096 ], [ 0.00015537742001470178, -0.024237608537077904, -0.003044326091185212 ], [ -0.0012453795643523335, 0.004743263591080904, 0.004625802394002676 ], [ -0.14595142006874084, 0.0308919008821249, -0.2779163420200348 ], [ -0.03314027562737465, -0.07205720245838165, -1.3401029109954834 ], [ 0.02448190003633499, -0.468079537153244, 0.018310735002160072 ], [ 0.9222347140312195, -0.00624655419960618, -0.0003706512216012925 ], [ 0.0311859343200922, -0.01980999857187271, -0.4311404228210449 ], [ -0.05887744575738907, 0.009565972723066807, 0.0008855919586494565 ], [ -0.0638674795627594, -1.1882448196411133, -0.07744041085243225 ], [ 0.002320833969861269, 0.014880148693919182, 0.00827236007899046 ] ], [ [ 0.028708748519420624, 0.023731501772999763, -0.05906220152974129 ], [ 0.36697518825531006, 0.0020822372753173113, 9.442192094866186e-06 ], [ -0.17328320443630219, -0.029694421216845512, -0.2592017650604248 ], [ -0.027558816596865654, 0.44522055983543396, 0.00263651879504323 ], [ 0.45747342705726624, 0.006375299766659737, 0.000838644162286073 ], [ -0.29932498931884766, -0.0034287264570593834, -0.005712746176868677 ], [ 0.010242770425975323, 0.0686849057674408, 0.12300582230091095 ], [ 0.0019906593952327967, -0.006487288512289524, 0.004740884527564049 ], [ -0.417245090007782, 0.002172173699364066, -0.000527464144397527 ], [ -0.16229933500289917, -0.015825729817152023, 0.26093363761901855 ], [ -0.01547759398818016, -0.4560239017009735, -0.001296655391342938 ], [ 0.520811140537262, -0.016100304201245308, -0.0033653294667601585 ], [ -0.061035193502902985, 0.013747301883995533, 0.0011975782690569758 ], [ 0.002211581217125058, 0.013100380077958107, -0.41168421506881714 ], [ 0.000723487522918731, -0.0009448538185097277, -0.0023157261312007904 ], [ -0.0008414603653363883, -0.22904154658317566, 0.0037871438544243574 ], [ -0.004434449132531881, -0.0019493037834763527, 0.04349867254495621 ], [ 0.11113234609365463, -0.001496539101935923, -6.745033260813216e-06 ], [ 0.03568394109606743, 0.00850191805511713, 0.2815527021884918 ], [ 0.007574420887976885, -0.005988026969134808, -0.04585442319512367 ], [ -0.014899174682796001, -0.6360949277877808, 0.014495083130896091 ], [ 1.1318601369857788, -0.0009174949955195189, -0.008180576376616955 ], [ -0.038145799189805984, -0.05923198536038399, -0.04122990742325783 ], [ 0.07719366252422333, -0.0010574767366051674, 0.0009220906649716198 ], [ -0.0010063578374683857, 0.12876589596271515, -0.021289559081196785 ], [ -0.0008511252817697823, -0.0003541657351888716, -0.0006832815706729889 ], [ 0.10374817997217178, -0.014772959984838963, -0.28235113620758057 ], [ 0.029763568192720413, 0.00017807059339247644, 0.007343007251620293 ], [ -0.0064206854440271854, 0.3665950298309326, -0.0003897137939929962 ], [ 1.0820642709732056, -0.0005379249923862517, -0.0039028781466186047 ], [ -0.004170380067080259, 0.06480656564235687, -0.10721305757761002 ], [ -0.06350508332252502, 0.0011865347623825073, -0.0005369586870074272 ], [ -0.0021817537490278482, -0.08756759762763977, -0.008148521184921265 ], [ 0.00243115471675992, -0.003949992824345827, 0.005949904676526785 ] ] ], "root_positions": [ [ 1.9461809396743774, 0.7706995010375977, 1.7243560552597046 ], [ 0.003315839683637023, 0.7415399551391602, -0.0012030001962557435 ] ], "smooth_root_2d": [ [ 1.9461809396743774, 1.7243560552597046 ], [ 0.003315839683637023, -0.0012030001962557435 ] ] }, { "type": "right-foot", "frame_indices": [ 0 ], "local_joints_rot": [ [ [ 0.028708748519420624, 0.023731501772999763, -0.05906220152974129 ], [ 0.36697518825531006, 0.0020822372753173113, 9.442192094866186e-06 ], [ -0.17328320443630219, -0.029694421216845512, -0.2592017650604248 ], [ -0.027558816596865654, 0.44522055983543396, 0.00263651879504323 ], [ 0.45747342705726624, 0.006375299766659737, 0.000838644162286073 ], [ -0.29932498931884766, -0.0034287264570593834, -0.005712746176868677 ], [ 0.010242770425975323, 0.0686849057674408, 0.12300582230091095 ], [ 0.0019906593952327967, -0.006487288512289524, 0.004740884527564049 ], [ -0.417245090007782, 0.002172173699364066, -0.000527464144397527 ], [ -0.16229933500289917, -0.015825729817152023, 0.26093363761901855 ], [ -0.01547759398818016, -0.4560239017009735, -0.001296655391342938 ], [ 0.520811140537262, -0.016100304201245308, -0.0033653294667601585 ], [ -0.061035193502902985, 0.013747301883995533, 0.0011975782690569758 ], [ 0.002211581217125058, 0.013100380077958107, -0.41168421506881714 ], [ 0.000723487522918731, -0.0009448538185097277, -0.0023157261312007904 ], [ -0.0008414603653363883, -0.22904154658317566, 0.0037871438544243574 ], [ -0.004434449132531881, -0.0019493037834763527, 0.04349867254495621 ], [ 0.11113234609365463, -0.001496539101935923, -6.745033260813216e-06 ], [ 0.03568394109606743, 0.00850191805511713, 0.2815527021884918 ], [ 0.007574420887976885, -0.005988026969134808, -0.04585442319512367 ], [ -0.014899174682796001, -0.6360949277877808, 0.014495083130896091 ], [ 1.1318601369857788, -0.0009174949955195189, -0.008180576376616955 ], [ -0.038145799189805984, -0.05923198536038399, -0.04122990742325783 ], [ 0.07719366252422333, -0.0010574767366051674, 0.0009220906649716198 ], [ -0.0010063578374683857, 0.12876589596271515, -0.021289559081196785 ], [ -0.0008511252817697823, -0.0003541657351888716, -0.0006832815706729889 ], [ 0.10374817997217178, -0.014772959984838963, -0.28235113620758057 ], [ 0.029763568192720413, 0.00017807059339247644, 0.007343007251620293 ], [ -0.0064206854440271854, 0.3665950298309326, -0.0003897137939929962 ], [ 1.0820642709732056, -0.0005379249923862517, -0.0039028781466186047 ], [ -0.004170380067080259, 0.06480656564235687, -0.10721305757761002 ], [ -0.06350508332252502, 0.0011865347623825073, -0.0005369586870074272 ], [ -0.0021817537490278482, -0.08756759762763977, -0.008148521184921265 ], [ 0.00243115471675992, -0.003949992824345827, 0.005949904676526785 ] ] ], "root_positions": [ [ 0.003315839683637023, 0.7415399551391602, -0.0012030001962557435 ] ], "smooth_root_2d": [ [ 0.003315839683637023, -0.0012030001962557435 ] ] }, { "type": "left-hand", "frame_indices": [ 0 ], "local_joints_rot": [ [ [ 0.028708748519420624, 0.023731501772999763, -0.05906220152974129 ], [ 0.36697518825531006, 0.0020822372753173113, 9.442192094866186e-06 ], [ -0.17328320443630219, -0.029694421216845512, -0.2592017650604248 ], [ -0.027558816596865654, 0.44522055983543396, 0.00263651879504323 ], [ 0.45747342705726624, 0.006375299766659737, 0.000838644162286073 ], [ -0.29932498931884766, -0.0034287264570593834, -0.005712746176868677 ], [ 0.010242770425975323, 0.0686849057674408, 0.12300582230091095 ], [ 0.0019906593952327967, -0.006487288512289524, 0.004740884527564049 ], [ -0.417245090007782, 0.002172173699364066, -0.000527464144397527 ], [ -0.16229933500289917, -0.015825729817152023, 0.26093363761901855 ], [ -0.01547759398818016, -0.4560239017009735, -0.001296655391342938 ], [ 0.520811140537262, -0.016100304201245308, -0.0033653294667601585 ], [ -0.061035193502902985, 0.013747301883995533, 0.0011975782690569758 ], [ 0.002211581217125058, 0.013100380077958107, -0.41168421506881714 ], [ 0.000723487522918731, -0.0009448538185097277, -0.0023157261312007904 ], [ -0.0008414603653363883, -0.22904154658317566, 0.0037871438544243574 ], [ -0.004434449132531881, -0.0019493037834763527, 0.04349867254495621 ], [ 0.11113234609365463, -0.001496539101935923, -6.745033260813216e-06 ], [ 0.03568394109606743, 0.00850191805511713, 0.2815527021884918 ], [ 0.007574420887976885, -0.005988026969134808, -0.04585442319512367 ], [ -0.014899174682796001, -0.6360949277877808, 0.014495083130896091 ], [ 1.1318601369857788, -0.0009174949955195189, -0.008180576376616955 ], [ -0.038145799189805984, -0.05923198536038399, -0.04122990742325783 ], [ 0.07719366252422333, -0.0010574767366051674, 0.0009220906649716198 ], [ -0.0010063578374683857, 0.12876589596271515, -0.021289559081196785 ], [ -0.0008511252817697823, -0.0003541657351888716, -0.0006832815706729889 ], [ 0.10374817997217178, -0.014772959984838963, -0.28235113620758057 ], [ 0.029763568192720413, 0.00017807059339247644, 0.007343007251620293 ], [ -0.0064206854440271854, 0.3665950298309326, -0.0003897137939929962 ], [ 1.0820642709732056, -0.0005379249923862517, -0.0039028781466186047 ], [ -0.004170380067080259, 0.06480656564235687, -0.10721305757761002 ], [ -0.06350508332252502, 0.0011865347623825073, -0.0005369586870074272 ], [ -0.0021817537490278482, -0.08756759762763977, -0.008148521184921265 ], [ 0.00243115471675992, -0.003949992824345827, 0.005949904676526785 ] ] ], "root_positions": [ [ 0.003315839683637023, 0.7415399551391602, -0.0012030001962557435 ] ], "smooth_root_2d": [ [ 0.003315839683637023, -0.0012030001962557435 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/04_ee_constraint/meta.json ================================================ { "text": "A person walks diagonally to the left and waves at someone on their right", "duration": 4.966666666666667, "num_samples": 1, "seed": 44, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/05_root_path/constraints.json ================================================ [ { "type": "root2d", "frame_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180 ], "smooth_root_2d": [ [ -0.024789854884147644, 0.01764228567481041 ], [ -0.019911596551537514, 0.03666473180055618 ], [ -0.015032900497317314, 0.05568705126643181 ], [ -0.010153300128877163, 0.07470902800559998 ], [ -0.005272198934108019, 0.09373034536838531 ], [ -0.00038888092967681587, 0.11275061219930649 ], [ 0.004497467540204525, 0.1317693293094635 ], [ 0.009387745521962643, 0.15078598260879517 ], [ 0.014282921329140663, 0.16979998350143433 ], [ 0.019184017553925514, 0.18881070613861084 ], [ 0.024092093110084534, 0.20781749486923218 ], [ 0.029008235782384872, 0.226819708943367 ], [ 0.033933546394109726, 0.24581670761108398 ], [ 0.038869116455316544, 0.2648078203201294 ], [ 0.04381602630019188, 0.2837924659252167 ], [ 0.048775337636470795, 0.30277004837989807 ], [ 0.05374806746840477, 0.321740061044693 ], [ 0.058735184371471405, 0.3407020568847656 ], [ 0.06373759359121323, 0.35965561866760254 ], [ 0.06875615566968918, 0.37860047817230225 ], [ 0.07379162311553955, 0.3975364565849304 ], [ 0.07884468138217926, 0.4164634943008423 ], [ 0.08391592651605606, 0.43538162112236023 ], [ 0.08900584280490875, 0.45429113507270813 ], [ 0.09411482512950897, 0.47319236397743225 ], [ 0.0992431491613388, 0.49208587408065796 ], [ 0.10439097136259079, 0.5109724998474121 ], [ 0.1095583438873291, 0.5298531651496887 ], [ 0.11474518477916718, 0.5487290620803833 ], [ 0.11995130032300949, 0.5676016807556152 ], [ 0.12517637014389038, 0.5864726901054382 ], [ 0.13041996955871582, 0.6053440570831299 ], [ 0.13568153977394104, 0.6242179274559021 ], [ 0.1409604400396347, 0.6430967450141907 ], [ 0.14625589549541473, 0.6619831919670105 ], [ 0.15156707167625427, 0.6808802485466003 ], [ 0.15689301490783691, 0.6997910141944885 ], [ 0.16223272681236267, 0.7187188267707825 ], [ 0.16759774088859558, 0.7376715540885925 ], [ 0.17303690314292908, 0.7566697001457214 ], [ 0.17862369120121002, 0.7757418751716614 ], [ 0.1844315379858017, 0.7949170470237732 ], [ 0.19053390622138977, 0.8142240643501282 ], [ 0.19700415432453156, 0.8336920142173767 ], [ 0.20391567051410675, 0.8533498644828796 ], [ 0.21134179830551147, 0.8732268214225769 ], [ 0.21935580670833588, 0.8933521509170532 ], [ 0.22803090512752533, 0.9137551784515381 ], [ 0.23744019865989685, 0.9344654083251953 ], [ 0.24765664339065552, 0.9555124640464783 ], [ 0.2587530016899109, 0.9769262671470642 ], [ 0.2708017826080322, 0.9987370371818542 ], [ 0.2838752567768097, 1.0209753513336182 ], [ 0.29804527759552, 1.0436722040176392 ], [ 0.3133833110332489, 1.0668591260910034 ], [ 0.32996034622192383, 1.0905684232711792 ], [ 0.3478468656539917, 1.1148326396942139 ], [ 0.36711281538009644, 1.1396855115890503 ], [ 0.3878275454044342, 1.1651611328125 ], [ 0.41000601649284363, 1.1912426948547363 ], [ 0.4336090087890625, 1.2178623676300049 ], [ 0.45859649777412415, 1.24495267868042 ], [ 0.4849279224872589, 1.272446632385254 ], [ 0.5125620365142822, 1.300277590751648 ], [ 0.5414570569992065, 1.3283785581588745 ], [ 0.571570634841919, 1.3566826581954956 ], [ 0.6028600931167603, 1.3851218223571777 ], [ 0.6352822780609131, 1.4136276245117188 ], [ 0.6687941551208496, 1.4421300888061523 ], [ 0.7033523917198181, 1.4705579280853271 ], [ 0.7389140725135803, 1.4988375902175903 ], [ 0.7754364013671875, 1.5268937349319458 ], [ 0.8128772974014282, 1.554648518562317 ], [ 0.8511953353881836, 1.5820214748382568 ], [ 0.8903500437736511, 1.6089295148849487 ], [ 0.930302083492279, 1.6352869272232056 ], [ 0.9710133075714111, 1.6610050201416016 ], [ 1.0124471187591553, 1.685992956161499 ], [ 1.0545682907104492, 1.7101572751998901 ], [ 1.0973432064056396, 1.7334026098251343 ], [ 1.1407400369644165, 1.755631923675537 ], [ 1.1847283840179443, 1.7767466306686401 ], [ 1.229279637336731, 1.7966474294662476 ], [ 1.2743664979934692, 1.8152343034744263 ], [ 1.3199630975723267, 1.8324071168899536 ], [ 1.3660447597503662, 1.848065733909607 ], [ 1.4125876426696777, 1.8621103763580322 ], [ 1.4595685005187988, 1.8744415044784546 ], [ 1.5069485902786255, 1.8850340843200684 ], [ 1.5546728372573853, 1.8939374685287476 ], [ 1.6026861667633057, 1.9012004137039185 ], [ 1.650932788848877, 1.9068700075149536 ], [ 1.6993565559387207, 1.9109913110733032 ], [ 1.7479000091552734, 1.9136062860488892 ], [ 1.7965046167373657, 1.9147534370422363 ], [ 1.8451100587844849, 1.9144660234451294 ], [ 1.893654465675354, 1.9127724170684814 ], [ 1.942073941230774, 1.9096946716308594 ], [ 1.990302324295044, 1.9052486419677734 ], [ 2.03827166557312, 1.8994430303573608 ], [ 2.0859110355377197, 1.8922799825668335 ], [ 2.133148193359375, 1.8837546110153198 ], [ 2.179908037185669, 1.8738549947738647 ], [ 2.2261133193969727, 1.862563133239746 ], [ 2.27168607711792, 1.8498553037643433 ], [ 2.316545248031616, 1.8357020616531372 ], [ 2.360609769821167, 1.8200697898864746 ], [ 2.403796911239624, 1.8029208183288574 ], [ 2.44602370262146, 1.7842146158218384 ], [ 2.4872069358825684, 1.7639081478118896 ], [ 2.5272626876831055, 1.7419570684432983 ], [ 2.566108465194702, 1.7183157205581665 ], [ 2.603734254837036, 1.693010687828064 ], [ 2.640204906463623, 1.6661417484283447 ], [ 2.6755847930908203, 1.6378077268600464 ], [ 2.7099392414093018, 1.6081076860427856 ], [ 2.743333101272583, 1.5771397352218628 ], [ 2.7758309841156006, 1.5450016260147095 ], [ 2.80749773979187, 1.5117899179458618 ], [ 2.8383967876434326, 1.477600336074829 ], [ 2.868591785430908, 1.4425268173217773 ], [ 2.8981447219848633, 1.4066622257232666 ], [ 2.9271178245544434, 1.3700973987579346 ], [ 2.9555718898773193, 1.3329222202301025 ], [ 2.983566999435425, 1.2952247858047485 ], [ 3.011162757873535, 1.2570923566818237 ], [ 3.038418769836426, 1.2186110019683838 ], [ 3.0653929710388184, 1.1798664331436157 ], [ 3.092144250869751, 1.1409443616867065 ], [ 3.118730306625366, 1.1019304990768433 ], [ 3.1451311111450195, 1.062860131263733 ], [ 3.171248197555542, 1.0237183570861816 ], [ 3.1969823837280273, 0.9844915866851807 ], [ 3.222233295440674, 0.945167064666748 ], [ 3.246898889541626, 0.905733585357666 ], [ 3.270875930786133, 0.8661811947822571 ], [ 3.294057846069336, 0.826501190662384 ], [ 3.3163373470306396, 0.7866860032081604 ], [ 3.3376033306121826, 0.7467291951179504 ], [ 3.357743263244629, 0.7066251039505005 ], [ 3.3766419887542725, 0.6663689613342285 ], [ 3.394181966781616, 0.6259563565254211 ], [ 3.4102442264556885, 0.5853835344314575 ], [ 3.424708127975464, 0.5446467995643616 ], [ 3.4374516010284424, 0.5037427544593811 ], [ 3.448352098464966, 0.46266797184944153 ], [ 3.457287073135376, 0.42141908407211304 ], [ 3.4641330242156982, 0.3799927234649658 ], [ 3.468876838684082, 0.33839157223701477 ], [ 3.471616506576538, 0.2966245114803314 ], [ 3.4724483489990234, 0.2547004222869873 ], [ 3.4714694023132324, 0.21262840926647186 ], [ 3.4687745571136475, 0.17041781544685364 ], [ 3.4644577503204346, 0.1280783712863922 ], [ 3.4586100578308105, 0.0856202244758606 ], [ 3.4513206481933594, 0.043054141104221344 ], [ 3.442674398422241, 0.0003915314737241715 ], [ 3.432753562927246, -0.04235544055700302 ], [ 3.421635389328003, -0.08517380803823471 ], [ 3.409393072128296, -0.12804976105690002 ], [ 3.3960955142974854, -0.17096871137619019 ], [ 3.3818066120147705, -0.21391519904136658 ], [ 3.366586685180664, -0.25687310099601746 ], [ 3.3504908084869385, -0.29982560873031616 ], [ 3.333570718765259, -0.34275543689727783 ], [ 3.315875291824341, -0.3856448531150818 ], [ 3.297449827194214, -0.42847591638565063 ], [ 3.278337240219116, -0.47123050689697266 ], [ 3.2585792541503906, -0.5138905048370361 ], [ 3.238215923309326, -0.5564379692077637 ], [ 3.217292308807373, -0.5988707542419434 ], [ 3.1958582401275635, -0.6412028074264526 ], [ 3.1739635467529297, -0.6834480166435242 ], [ 3.1516590118408203, -0.7256200909614563 ], [ 3.1289961338043213, -0.7677323818206787 ], [ 3.1060280799865723, -0.8097975850105286 ], [ 3.082807779312134, -0.8518276214599609 ], [ 3.0593905448913574, -0.8938331604003906 ], [ 3.0358314514160156, -0.9358235001564026 ], [ 3.0062689781188965, -0.9883013367652893 ], [ 2.9885144233703613, -1.0197867155075073 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/05_root_path/meta.json ================================================ { "text": "Initially standing still and calm, the person then starts jogging in a counterclockwise arc.", "duration": 6.033333333333333, "num_samples": 1, "seed": 62, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/06_root_waypoints/constraints.json ================================================ [ { "type": "root2d", "frame_indices": [ 0, 87, 169, 240 ], "smooth_root_2d": [ [ 0.037946805357933044, -0.036908961832523346 ], [ 2.2506563663482666, 0.06945009529590607 ], [ 2.23332142829895, -2.0749685764312744 ], [ 4.0815324783325195, -2.273184061050415 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/06_root_waypoints/meta.json ================================================ { "text": "A person is walking while carrying a small object in their left hand", "duration": 8.033333333333333, "num_samples": 1, "seed": 61, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/07_text_terrain/meta.json ================================================ { "text": "A person begins walking up the stairs", "duration": 3.5, "num_samples": 1, "seed": 44, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-g1-rp/08_text_object/meta.json ================================================ { "text": "A person picks up an object from low on their left side and places it up high", "duration": 5.033333333333333, "num_samples": 1, "seed": 47, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/01_single_text_prompt/meta.json ================================================ { "text": "A person runs forward and then leaps over an obstacle in front of them.", "duration": 5.0, "num_samples": 1, "seed": 42, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/02_multi_text_prompt/meta.json ================================================ { "texts": [ "A person is walking forward casually.", "A person turns to the right and starts sneakily moving forward" ], "durations": [ 3.533333333333333, 4.033333333333333 ], "num_samples": 1, "seed": 42, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/03_full_body_keyframes/constraints.json ================================================ [ { "type": "fullbody", "frame_indices": [ 79, 134 ], "local_joints_rot": [ [ [ 0.2765098512172699, 0.3728594183921814, -0.3292054831981659 ], [ 0.35604047775268555, 0.018222831189632416, -0.054862238466739655 ], [ 0.12065527588129044, -0.027457308024168015, -0.06907646358013153 ], [ 0.6048485636711121, -0.11472737789154053, -0.19573566317558289 ], [ -0.12398597598075867, 0.03840772435069084, 0.18822282552719116 ], [ -0.06553511321544647, 0.13032270967960358, 0.04257704317569733 ], [ -0.24969959259033203, 0.06990747153759003, 0.13426002860069275 ], [ -0.002762501360848546, 0.0010064352536574006, -0.0012083332985639572 ], [ -0.18770116567611694, -0.06528781354427338, 0.006136383395642042 ], [ -0.18933561444282532, 0.06753389537334442, -0.00862747710198164 ], [ 0.1765439361333847, -0.5079103708267212, 0.11742556095123291 ], [ -0.6833809614181519, -0.36341744661331177, -0.09875624626874924 ], [ -0.004083660896867514, -0.2955799102783203, 0.007416445296257734 ], [ -0.46948903799057007, 0.0019703502766788006, 0.2218078076839447 ], [ 0.15589098632335663, 0.29247695207595825, -0.2839103043079376 ], [ -0.006183772347867489, 0.039787642657756805, -1.0509610176086426 ], [ 0.28110796213150024, -0.01673225313425064, 0.05465283617377281 ], [ 0.4582408368587494, 0.6058111786842346, 1.040449619293213 ], [ -0.016165010631084442, 0.7843144536018372, 0.007565980777144432 ], [ -0.21160456538200378, 0.009858175180852413, 0.022257711738348007 ], [ 0.08559019863605499, -0.26941442489624023, 0.28404051065444946 ], [ -0.0722564086318016, -0.055347055196762085, 0.8767912983894348 ], [ -0.9036330580711365, -0.19308030605316162, 0.6912829875946045 ], [ 1.7018375396728516, -0.052370231598615646, 0.0016176343197003007 ], [ -0.6713079810142517, -0.22423480451107025, -0.17199599742889404 ], [ -0.2397085577249527, -0.04111046716570854, 0.02976534143090248 ], [ -1.4084941148757935, -0.42399686574935913, 0.23780424892902374 ], [ 1.488803744316101, -0.006882219575345516, 0.005796314682811499 ], [ -0.34890878200531006, 0.25402817130088806, -0.10165958851575851 ], [ -0.017090337350964546, 0.013983047567307949, -0.02469288557767868 ] ], [ [ -0.10219376534223557, 0.15241079032421112, -0.1140606626868248 ], [ -0.07097288966178894, -0.023205779492855072, 0.014893154613673687 ], [ -0.11436910182237625, -0.07182353734970093, -0.024793410673737526 ], [ 0.32571300864219666, -0.11312247067689896, -0.017911700531840324 ], [ 0.036515623331069946, -0.0007576555362902582, 0.14029929041862488 ], [ -0.06553909182548523, 0.07225329428911209, 0.0065536051988601685 ], [ -0.09946814924478531, 0.02283940091729164, 0.060293473303318024 ], [ -0.0007363191107288003, 0.0019088855478912592, 0.00034123589284718037 ], [ -0.18651022017002106, -0.06423485279083252, 0.0069741918705403805 ], [ -0.18586836755275726, 0.06800899654626846, -0.0060585117898881435 ], [ 0.23363706469535828, -0.20687633752822876, -0.07240967452526093 ], [ -0.3135974407196045, -0.2623864710330963, -1.0657873153686523 ], [ -0.012310811318457127, -1.6650079488754272, -0.010509567335247993 ], [ -0.8171713352203369, -0.2551392912864685, 0.08705981075763702 ], [ 0.13723036646842957, 0.2864063084125519, -0.2900709807872772 ], [ -0.005930017679929733, 0.05293968319892883, -1.0459250211715698 ], [ 0.24218180775642395, 0.02018338069319725, 0.1226770281791687 ], [ 0.3315959572792053, 0.3782292902469635, 1.2296319007873535 ], [ -0.0014527677558362484, 0.3045952022075653, -0.0014049106976017356 ], [ -0.20010970532894135, -0.07485076785087585, 0.0041703470051288605 ], [ 0.08470325917005539, -0.3079097270965576, 0.29375413060188293 ], [ -0.09725581854581833, -0.055068179965019226, 0.8742175698280334 ], [ 0.4040503203868866, -0.016711091622710228, 0.21672509610652924 ], [ 0.5082376599311829, -0.013459251262247562, 0.004872385878115892 ], [ 0.1745426058769226, -0.24501416087150574, -0.003703102469444275 ], [ -0.33402949571609497, -0.035541169345378876, 0.032360970973968506 ], [ -0.37681734561920166, 0.02067263424396515, 0.10783999413251877 ], [ 0.4257254898548126, 0.0016118268249556422, 0.0033562832977622747 ], [ 0.04139057174324989, 0.032555095851421356, 0.04008425772190094 ], [ -0.03090120106935501, 0.01570875011384487, -0.024774780496954918 ] ] ], "root_positions": [ [ -0.18697306513786316, 0.7126776576042175, 1.1559109687805176 ], [ -0.014062155969440937, 0.9611971974372864, 2.898127555847168 ] ], "smooth_root_2d": [ [ -0.18697306513786316, 1.1559109687805176 ], [ -0.014062155969440937, 2.898127555847168 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/03_full_body_keyframes/meta.json ================================================ { "text": "A person walks forward and picks something up from the ground", "duration": 5.0, "num_samples": 1, "seed": 43, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/04_ee_constraint/constraints.json ================================================ [ { "type": "right-foot", "frame_indices": [ 28, 94 ], "local_joints_rot": [ [ [ 0.14788010716438293, -0.010833931155502796, -0.01388303842395544 ], [ -0.03901153802871704, 0.0003969503741245717, -0.00016447225061710924 ], [ -0.09507032483816147, 0.008639314211905003, -0.0073561337776482105 ], [ 0.21237806975841522, -0.02139095962047577, -0.01700877584517002 ], [ -0.20991119742393494, 0.06551700085401535, -0.05272415280342102 ], [ -0.06337061524391174, 0.05204080045223236, 0.014292852953076363 ], [ 0.07047945261001587, 0.08330309391021729, -0.002013514516875148 ], [ -0.0019600456580519676, -0.0013381227618083358, -2.7628393581835553e-06 ], [ -0.18709787726402283, -0.06659803539514542, 0.0078862514346838 ], [ -0.18698126077651978, 0.06395528465509415, -0.008215037174522877 ], [ 0.08230585604906082, -0.38376951217651367, 0.05542140454053879 ], [ -0.7260366082191467, -0.24878422915935516, -0.35609468817710876 ], [ 0.004249485209584236, -0.4476320147514343, -0.018469776958227158 ], [ -0.9212101697921753, -0.1470143049955368, 0.5044775605201721 ], [ 0.14870156347751617, 0.2985619604587555, -0.29298385977745056 ], [ 0.001955621177330613, 0.055549487471580505, -1.0630463361740112 ], [ 0.11859050393104553, 0.46535199880599976, -0.030845582485198975 ], [ -0.7298654317855835, 0.5346517562866211, 0.2791443467140198 ], [ 0.008972911164164543, 0.48752307891845703, 0.01847967691719532 ], [ -0.5805565118789673, -0.08708631247282028, -0.15088550746440887 ], [ 0.08582834899425507, -0.2886488735675812, 0.2854447066783905 ], [ -0.0898093581199646, -0.05874425172805786, 0.8657776117324829 ], [ -0.3135877549648285, 0.07464626431465149, 0.0517989918589592 ], [ 0.29447537660598755, -0.003720453940331936, 0.0011728419922292233 ], [ -0.12890003621578217, 0.0839272066950798, -0.090343177318573 ], [ 0.008360159583389759, -0.03457032889127731, 0.02827553078532219 ], [ -0.3120643198490143, -0.01133657619357109, -0.03218594938516617 ], [ 0.2538771331310272, 0.0018040596041828394, 0.0009352069464512169 ], [ -0.0887608677148819, -0.03465384244918823, 0.07154331356287003 ], [ 0.01681467890739441, 0.01778421923518181, -0.025033073499798775 ] ], [ [ 0.21243979036808014, 1.0922467708587646, -0.05739659443497658 ], [ -0.04288899898529053, 0.019888481125235558, -0.014078406617045403 ], [ -0.09594971686601639, 0.10335114598274231, -0.007776615675538778 ], [ 0.2422163188457489, 0.08445896953344345, -0.05605608597397804 ], [ -0.14986605942249298, 0.10279522091150284, -0.19410337507724762 ], [ -0.07278254628181458, 0.00021229058620519936, -0.0064666238613426685 ], [ -0.18101167678833008, -0.047196485102176666, 0.09371022135019302 ], [ -0.0013136633206158876, -0.0020103836432099342, -0.0002618256548885256 ], [ -0.1867513209581375, -0.0681525468826294, 0.0023792991414666176 ], [ -0.18714284896850586, 0.06443598866462708, -0.003183535533025861 ], [ 0.1040755957365036, -0.1164601668715477, -0.08953910320997238 ], [ -0.7818892598152161, -0.40082883834838867, -0.40901198983192444 ], [ 0.0014971806667745113, -0.7006690502166748, -0.003588718129321933 ], [ -0.7653300762176514, -0.030549153685569763, 0.5779297947883606 ], [ 0.1444747895002365, 0.30648332834243774, -0.2944350242614746 ], [ 0.00627485616132617, 0.05844533443450928, -1.0504485368728638 ], [ 0.16790169477462769, 0.6803913116455078, -0.0802350640296936 ], [ -0.7650246620178223, 0.2571314871311188, 0.044474273920059204 ], [ 0.00177879654802382, 0.32478848099708557, 0.024663111194968224 ], [ -1.1130585670471191, 0.06198093295097351, -0.1499929279088974 ], [ 0.09419120848178864, -0.28672322630882263, 0.2861841320991516 ], [ -0.08110660314559937, -0.06315471976995468, 0.8641197085380554 ], [ -0.4702282249927521, -0.2976788580417633, -0.08966172486543655 ], [ 0.2188275307416916, -0.010813144035637379, -0.0024994502309709787 ], [ 0.12644176185131073, -0.4933742582798004, -0.23269610106945038 ], [ -0.05216464772820473, -0.03182952478528023, 0.026469329372048378 ], [ -0.21055173873901367, -0.5854666233062744, -0.08316371589899063 ], [ 0.2703852653503418, -0.0070351893082261086, 0.00034556735772639513 ], [ -0.20080512762069702, -0.5529999136924744, 0.08794122189283371 ], [ -0.020619722083210945, 0.01961597241461277, -0.02498687617480755 ] ] ], "root_positions": [ [ 0.006224155426025391, 1.0099574327468872, 0.0004121592501178384 ], [ 0.025673866271972656, 1.0039517879486084, 0.0002174415858462453 ] ], "smooth_root_2d": [ [ 0.006224155426025391, 0.0004121592501178384 ], [ 0.025673866271972656, 0.0002174415858462453 ] ] }, { "type": "left-foot", "frame_indices": [ 28, 94 ], "local_joints_rot": [ [ [ 0.14788010716438293, -0.010833931155502796, -0.01388303842395544 ], [ -0.03901153802871704, 0.0003969503741245717, -0.00016447225061710924 ], [ -0.09507032483816147, 0.008639314211905003, -0.0073561337776482105 ], [ 0.21237806975841522, -0.02139095962047577, -0.01700877584517002 ], [ -0.20991119742393494, 0.06551700085401535, -0.05272415280342102 ], [ -0.06337061524391174, 0.05204080045223236, 0.014292852953076363 ], [ 0.07047945261001587, 0.08330309391021729, -0.002013514516875148 ], [ -0.0019600456580519676, -0.0013381227618083358, -2.7628393581835553e-06 ], [ -0.18709787726402283, -0.06659803539514542, 0.0078862514346838 ], [ -0.18698126077651978, 0.06395528465509415, -0.008215037174522877 ], [ 0.08230585604906082, -0.38376951217651367, 0.05542140454053879 ], [ -0.7260366082191467, -0.24878422915935516, -0.35609468817710876 ], [ 0.004249485209584236, -0.4476320147514343, -0.018469776958227158 ], [ -0.9212101697921753, -0.1470143049955368, 0.5044775605201721 ], [ 0.14870156347751617, 0.2985619604587555, -0.29298385977745056 ], [ 0.001955621177330613, 0.055549487471580505, -1.0630463361740112 ], [ 0.11859050393104553, 0.46535199880599976, -0.030845582485198975 ], [ -0.7298654317855835, 0.5346517562866211, 0.2791443467140198 ], [ 0.008972911164164543, 0.48752307891845703, 0.01847967691719532 ], [ -0.5805565118789673, -0.08708631247282028, -0.15088550746440887 ], [ 0.08582834899425507, -0.2886488735675812, 0.2854447066783905 ], [ -0.0898093581199646, -0.05874425172805786, 0.8657776117324829 ], [ -0.3135877549648285, 0.07464626431465149, 0.0517989918589592 ], [ 0.29447537660598755, -0.003720453940331936, 0.0011728419922292233 ], [ -0.12890003621578217, 0.0839272066950798, -0.090343177318573 ], [ 0.008360159583389759, -0.03457032889127731, 0.02827553078532219 ], [ -0.3120643198490143, -0.01133657619357109, -0.03218594938516617 ], [ 0.2538771331310272, 0.0018040596041828394, 0.0009352069464512169 ], [ -0.0887608677148819, -0.03465384244918823, 0.07154331356287003 ], [ 0.01681467890739441, 0.01778421923518181, -0.025033073499798775 ] ], [ [ 0.21243979036808014, 1.0922467708587646, -0.05739659443497658 ], [ -0.04288899898529053, 0.019888481125235558, -0.014078406617045403 ], [ -0.09594971686601639, 0.10335114598274231, -0.007776615675538778 ], [ 0.2422163188457489, 0.08445896953344345, -0.05605608597397804 ], [ -0.14986605942249298, 0.10279522091150284, -0.19410337507724762 ], [ -0.07278254628181458, 0.00021229058620519936, -0.0064666238613426685 ], [ -0.18101167678833008, -0.047196485102176666, 0.09371022135019302 ], [ -0.0013136633206158876, -0.0020103836432099342, -0.0002618256548885256 ], [ -0.1867513209581375, -0.0681525468826294, 0.0023792991414666176 ], [ -0.18714284896850586, 0.06443598866462708, -0.003183535533025861 ], [ 0.1040755957365036, -0.1164601668715477, -0.08953910320997238 ], [ -0.7818892598152161, -0.40082883834838867, -0.40901198983192444 ], [ 0.0014971806667745113, -0.7006690502166748, -0.003588718129321933 ], [ -0.7653300762176514, -0.030549153685569763, 0.5779297947883606 ], [ 0.1444747895002365, 0.30648332834243774, -0.2944350242614746 ], [ 0.00627485616132617, 0.05844533443450928, -1.0504485368728638 ], [ 0.16790169477462769, 0.6803913116455078, -0.0802350640296936 ], [ -0.7650246620178223, 0.2571314871311188, 0.044474273920059204 ], [ 0.00177879654802382, 0.32478848099708557, 0.024663111194968224 ], [ -1.1130585670471191, 0.06198093295097351, -0.1499929279088974 ], [ 0.09419120848178864, -0.28672322630882263, 0.2861841320991516 ], [ -0.08110660314559937, -0.06315471976995468, 0.8641197085380554 ], [ -0.4702282249927521, -0.2976788580417633, -0.08966172486543655 ], [ 0.2188275307416916, -0.010813144035637379, -0.0024994502309709787 ], [ 0.12644176185131073, -0.4933742582798004, -0.23269610106945038 ], [ -0.05216464772820473, -0.03182952478528023, 0.026469329372048378 ], [ -0.21055173873901367, -0.5854666233062744, -0.08316371589899063 ], [ 0.2703852653503418, -0.0070351893082261086, 0.00034556735772639513 ], [ -0.20080512762069702, -0.5529999136924744, 0.08794122189283371 ], [ -0.020619722083210945, 0.01961597241461277, -0.02498687617480755 ] ] ], "root_positions": [ [ 0.006224155426025391, 1.0099574327468872, 0.0004121592501178384 ], [ 0.025673866271972656, 1.0039517879486084, 0.0002174415858462453 ] ], "smooth_root_2d": [ [ 0.006224155426025391, 0.0004121592501178384 ], [ 0.025673866271972656, 0.0002174415858462453 ] ] }, { "type": "left-hand", "frame_indices": [ 28, 94 ], "local_joints_rot": [ [ [ 0.14788010716438293, -0.010833931155502796, -0.01388303842395544 ], [ -0.03901153802871704, 0.0003969503741245717, -0.00016447225061710924 ], [ -0.09507032483816147, 0.008639314211905003, -0.0073561337776482105 ], [ 0.21237806975841522, -0.02139095962047577, -0.01700877584517002 ], [ -0.20991119742393494, 0.06551700085401535, -0.05272415280342102 ], [ -0.06337061524391174, 0.05204080045223236, 0.014292852953076363 ], [ 0.07047945261001587, 0.08330309391021729, -0.002013514516875148 ], [ -0.0019600456580519676, -0.0013381227618083358, -2.7628393581835553e-06 ], [ -0.18709787726402283, -0.06659803539514542, 0.0078862514346838 ], [ -0.18698126077651978, 0.06395528465509415, -0.008215037174522877 ], [ 0.08230585604906082, -0.38376951217651367, 0.05542140454053879 ], [ -0.7260366082191467, -0.24878422915935516, -0.35609468817710876 ], [ 0.004249485209584236, -0.4476320147514343, -0.018469776958227158 ], [ -0.9212101697921753, -0.1470143049955368, 0.5044775605201721 ], [ 0.14870156347751617, 0.2985619604587555, -0.29298385977745056 ], [ 0.001955621177330613, 0.055549487471580505, -1.0630463361740112 ], [ 0.11859050393104553, 0.46535199880599976, -0.030845582485198975 ], [ -0.7298654317855835, 0.5346517562866211, 0.2791443467140198 ], [ 0.008972911164164543, 0.48752307891845703, 0.01847967691719532 ], [ -0.5805565118789673, -0.08708631247282028, -0.15088550746440887 ], [ 0.08582834899425507, -0.2886488735675812, 0.2854447066783905 ], [ -0.0898093581199646, -0.05874425172805786, 0.8657776117324829 ], [ -0.3135877549648285, 0.07464626431465149, 0.0517989918589592 ], [ 0.29447537660598755, -0.003720453940331936, 0.0011728419922292233 ], [ -0.12890003621578217, 0.0839272066950798, -0.090343177318573 ], [ 0.008360159583389759, -0.03457032889127731, 0.02827553078532219 ], [ -0.3120643198490143, -0.01133657619357109, -0.03218594938516617 ], [ 0.2538771331310272, 0.0018040596041828394, 0.0009352069464512169 ], [ -0.0887608677148819, -0.03465384244918823, 0.07154331356287003 ], [ 0.01681467890739441, 0.01778421923518181, -0.025033073499798775 ] ], [ [ 0.21243979036808014, 1.0922467708587646, -0.05739659443497658 ], [ -0.04288899898529053, 0.019888481125235558, -0.014078406617045403 ], [ -0.09594971686601639, 0.10335114598274231, -0.007776615675538778 ], [ 0.2422163188457489, 0.08445896953344345, -0.05605608597397804 ], [ -0.14986605942249298, 0.10279522091150284, -0.19410337507724762 ], [ -0.07278254628181458, 0.00021229058620519936, -0.0064666238613426685 ], [ -0.18101167678833008, -0.047196485102176666, 0.09371022135019302 ], [ -0.0013136633206158876, -0.0020103836432099342, -0.0002618256548885256 ], [ -0.1867513209581375, -0.0681525468826294, 0.0023792991414666176 ], [ -0.18714284896850586, 0.06443598866462708, -0.003183535533025861 ], [ 0.1040755957365036, -0.1164601668715477, -0.08953910320997238 ], [ -0.7818892598152161, -0.40082883834838867, -0.40901198983192444 ], [ 0.0014971806667745113, -0.7006690502166748, -0.003588718129321933 ], [ -0.7653300762176514, -0.030549153685569763, 0.5779297947883606 ], [ 0.1444747895002365, 0.30648332834243774, -0.2944350242614746 ], [ 0.00627485616132617, 0.05844533443450928, -1.0504485368728638 ], [ 0.16790169477462769, 0.6803913116455078, -0.0802350640296936 ], [ -0.7650246620178223, 0.2571314871311188, 0.044474273920059204 ], [ 0.00177879654802382, 0.32478848099708557, 0.024663111194968224 ], [ -1.1130585670471191, 0.06198093295097351, -0.1499929279088974 ], [ 0.09419120848178864, -0.28672322630882263, 0.2861841320991516 ], [ -0.08110660314559937, -0.06315471976995468, 0.8641197085380554 ], [ -0.4702282249927521, -0.2976788580417633, -0.08966172486543655 ], [ 0.2188275307416916, -0.010813144035637379, -0.0024994502309709787 ], [ 0.12644176185131073, -0.4933742582798004, -0.23269610106945038 ], [ -0.05216464772820473, -0.03182952478528023, 0.026469329372048378 ], [ -0.21055173873901367, -0.5854666233062744, -0.08316371589899063 ], [ 0.2703852653503418, -0.0070351893082261086, 0.00034556735772639513 ], [ -0.20080512762069702, -0.5529999136924744, 0.08794122189283371 ], [ -0.020619722083210945, 0.01961597241461277, -0.02498687617480755 ] ] ], "root_positions": [ [ 0.006224155426025391, 1.0099574327468872, 0.0004121592501178384 ], [ 0.025673866271972656, 1.0039517879486084, 0.0002174415858462453 ] ], "smooth_root_2d": [ [ 0.006224155426025391, 0.0004121592501178384 ], [ 0.025673866271972656, 0.0002174415858462453 ] ] }, { "type": "right-hand", "frame_indices": [ 28, 94 ], "local_joints_rot": [ [ [ 0.14788010716438293, -0.010833931155502796, -0.01388303842395544 ], [ -0.03901153802871704, 0.0003969503741245717, -0.00016447225061710924 ], [ -0.09507032483816147, 0.008639314211905003, -0.0073561337776482105 ], [ 0.21237806975841522, -0.02139095962047577, -0.01700877584517002 ], [ -0.20991119742393494, 0.06551700085401535, -0.05272415280342102 ], [ -0.06337061524391174, 0.05204080045223236, 0.014292852953076363 ], [ 0.07047945261001587, 0.08330309391021729, -0.002013514516875148 ], [ -0.0019600456580519676, -0.0013381227618083358, -2.7628393581835553e-06 ], [ -0.18709787726402283, -0.06659803539514542, 0.0078862514346838 ], [ -0.18698126077651978, 0.06395528465509415, -0.008215037174522877 ], [ 0.08230585604906082, -0.38376951217651367, 0.05542140454053879 ], [ -0.7260366082191467, -0.24878422915935516, -0.35609468817710876 ], [ 0.004249485209584236, -0.4476320147514343, -0.018469776958227158 ], [ -0.9212101697921753, -0.1470143049955368, 0.5044775605201721 ], [ 0.14870156347751617, 0.2985619604587555, -0.29298385977745056 ], [ 0.001955621177330613, 0.055549487471580505, -1.0630463361740112 ], [ 0.11859050393104553, 0.46535199880599976, -0.030845582485198975 ], [ -0.7298654317855835, 0.5346517562866211, 0.2791443467140198 ], [ 0.008972911164164543, 0.48752307891845703, 0.01847967691719532 ], [ -0.5805565118789673, -0.08708631247282028, -0.15088550746440887 ], [ 0.08582834899425507, -0.2886488735675812, 0.2854447066783905 ], [ -0.0898093581199646, -0.05874425172805786, 0.8657776117324829 ], [ -0.3135877549648285, 0.07464626431465149, 0.0517989918589592 ], [ 0.29447537660598755, -0.003720453940331936, 0.0011728419922292233 ], [ -0.12890003621578217, 0.0839272066950798, -0.090343177318573 ], [ 0.008360159583389759, -0.03457032889127731, 0.02827553078532219 ], [ -0.3120643198490143, -0.01133657619357109, -0.03218594938516617 ], [ 0.2538771331310272, 0.0018040596041828394, 0.0009352069464512169 ], [ -0.0887608677148819, -0.03465384244918823, 0.07154331356287003 ], [ 0.01681467890739441, 0.01778421923518181, -0.025033073499798775 ] ], [ [ 0.21243979036808014, 1.0922467708587646, -0.05739659443497658 ], [ -0.04288899898529053, 0.019888481125235558, -0.014078406617045403 ], [ -0.09594971686601639, 0.10335114598274231, -0.007776615675538778 ], [ 0.2422163188457489, 0.08445896953344345, -0.05605608597397804 ], [ -0.14986605942249298, 0.10279522091150284, -0.19410337507724762 ], [ -0.07278254628181458, 0.00021229058620519936, -0.0064666238613426685 ], [ -0.18101167678833008, -0.047196485102176666, 0.09371022135019302 ], [ -0.0013136633206158876, -0.0020103836432099342, -0.0002618256548885256 ], [ -0.1867513209581375, -0.0681525468826294, 0.0023792991414666176 ], [ -0.18714284896850586, 0.06443598866462708, -0.003183535533025861 ], [ 0.1040755957365036, -0.1164601668715477, -0.08953910320997238 ], [ -0.7818892598152161, -0.40082883834838867, -0.40901198983192444 ], [ 0.0014971806667745113, -0.7006690502166748, -0.003588718129321933 ], [ -0.7653300762176514, -0.030549153685569763, 0.5779297947883606 ], [ 0.1444747895002365, 0.30648332834243774, -0.2944350242614746 ], [ 0.00627485616132617, 0.05844533443450928, -1.0504485368728638 ], [ 0.16790169477462769, 0.6803913116455078, -0.0802350640296936 ], [ -0.7650246620178223, 0.2571314871311188, 0.044474273920059204 ], [ 0.00177879654802382, 0.32478848099708557, 0.024663111194968224 ], [ -1.1130585670471191, 0.06198093295097351, -0.1499929279088974 ], [ 0.09419120848178864, -0.28672322630882263, 0.2861841320991516 ], [ -0.08110660314559937, -0.06315471976995468, 0.8641197085380554 ], [ -0.4702282249927521, -0.2976788580417633, -0.08966172486543655 ], [ 0.2188275307416916, -0.010813144035637379, -0.0024994502309709787 ], [ 0.12644176185131073, -0.4933742582798004, -0.23269610106945038 ], [ -0.05216464772820473, -0.03182952478528023, 0.026469329372048378 ], [ -0.21055173873901367, -0.5854666233062744, -0.08316371589899063 ], [ 0.2703852653503418, -0.0070351893082261086, 0.00034556735772639513 ], [ -0.20080512762069702, -0.5529999136924744, 0.08794122189283371 ], [ -0.020619722083210945, 0.01961597241461277, -0.02498687617480755 ] ] ], "root_positions": [ [ 0.006224155426025391, 1.0099574327468872, 0.0004121592501178384 ], [ 0.025673866271972656, 1.0039517879486084, 0.0002174415858462453 ] ], "smooth_root_2d": [ [ 0.006224155426025391, 0.0004121592501178384 ], [ 0.025673866271972656, 0.0002174415858462453 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/04_ee_constraint/meta.json ================================================ { "text": "A person picks up an object in front of them with two hands and places it to the left side", "duration": 5.033333333333333, "num_samples": 1, "seed": 48, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/05_root_path/constraints.json ================================================ [ { "type": "root2d", "frame_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299 ], "smooth_root_2d": [ [ 0.0720488652586937, 0.005473949480801821 ], [ 0.08682604879140854, 0.03799768537282944 ], [ 0.10160323977470398, 0.07052142173051834 ], [ 0.11638043075799942, 0.10304517298936844 ], [ 0.13115762174129486, 0.13556894659996033 ], [ 0.1459348350763321, 0.1680927276611328 ], [ 0.16071203351020813, 0.20061656832695007 ], [ 0.17548926174640656, 0.23314043879508972 ], [ 0.19026650488376617, 0.26566436886787415 ], [ 0.2050437480211258, 0.29818838834762573 ], [ 0.2198210209608078, 0.3307124972343445 ], [ 0.234598308801651, 0.3632366955280304 ], [ 0.2493756115436554, 0.39576101303100586 ], [ 0.2641529440879822, 0.42828547954559326 ], [ 0.27893027663230896, 0.4608100950717926 ], [ 0.29370763897895813, 0.4933348596096039 ], [ 0.3084850013256073, 0.5258598327636719 ], [ 0.32326239347457886, 0.5583849549293518 ], [ 0.3380397856235504, 0.5909103155136108 ], [ 0.352817177772522, 0.623435914516449 ], [ 0.36759456992149353, 0.6559617519378662 ], [ 0.3823719322681427, 0.6884878277778625 ], [ 0.39714932441711426, 0.721014142036438 ], [ 0.41192665696144104, 0.7535408139228821 ], [ 0.4267039895057678, 0.7860677242279053 ], [ 0.4414812922477722, 0.8185949325561523 ], [ 0.4562585949897766, 0.8511224389076233 ], [ 0.47103583812713623, 0.8836503028869629 ], [ 0.48581308126449585, 0.9161785244941711 ], [ 0.5005902647972107, 0.948707103729248 ], [ 0.5153675079345703, 0.9812359809875488 ], [ 0.5301446914672852, 1.0137652158737183 ], [ 0.5449219346046448, 1.046294927597046 ], [ 0.5596991777420044, 1.0788248777389526 ], [ 0.5744765400886536, 1.1113553047180176 ], [ 0.5892539024353027, 1.1438862085342407 ], [ 0.6040313243865967, 1.1764174699783325 ], [ 0.6188088655471802, 1.208949089050293 ], [ 0.6335865259170532, 1.2414813041687012 ], [ 0.648364245891571, 1.274013876914978 ], [ 0.6631421446800232, 1.3065470457077026 ], [ 0.6779201030731201, 1.3390806913375854 ], [ 0.6926981806755066, 1.371614933013916 ], [ 0.7074640989303589, 1.4041519165039062 ], [ 0.7221670746803284, 1.4367012977600098 ], [ 0.7367299199104309, 1.4692773818969727 ], [ 0.7510751485824585, 1.5018945932388306 ], [ 0.7651242613792419, 1.5345673561096191 ], [ 0.7787973880767822, 1.5673108100891113 ], [ 0.7920125126838684, 1.6001399755477905 ], [ 0.8046852350234985, 1.6330705881118774 ], [ 0.8167278170585632, 1.66611909866333 ], [ 0.8280492424964905, 1.6993021965026855 ], [ 0.8385547399520874, 1.7326377630233765 ], [ 0.8481456637382507, 1.766144037246704 ], [ 0.856719434261322, 1.7998400926589966 ], [ 0.8641700744628906, 1.8337457180023193 ], [ 0.8703880906105042, 1.8678812980651855 ], [ 0.875261127948761, 1.9022676944732666 ], [ 0.8786745071411133, 1.9369266033172607 ], [ 0.8805115222930908, 1.971879482269287 ], [ 0.8806543946266174, 2.0071487426757812 ], [ 0.8789843320846558, 2.0427565574645996 ], [ 0.8753821849822998, 2.0787250995635986 ], [ 0.869838297367096, 2.1150567531585693 ], [ 0.8624524474143982, 2.1517333984375 ], [ 0.8533244729042053, 2.1887366771698 ], [ 0.8425538539886475, 2.226048469543457 ], [ 0.8302397131919861, 2.263650894165039 ], [ 0.816480278968811, 2.301525831222534 ], [ 0.8013728260993958, 2.3396553993225098 ], [ 0.7850133180618286, 2.3780221939086914 ], [ 0.7674961686134338, 2.4166083335876465 ], [ 0.7489144802093506, 2.4553961753845215 ], [ 0.7293595671653748, 2.494368553161621 ], [ 0.7089214324951172, 2.533508062362671 ], [ 0.6876888871192932, 2.5727970600128174 ], [ 0.665749728679657, 2.6122183799743652 ], [ 0.6431912779808044, 2.651754379272461 ], [ 0.6200692653656006, 2.691394805908203 ], [ 0.5964087247848511, 2.731137275695801 ], [ 0.5722349882125854, 2.770979166030884 ], [ 0.5475742816925049, 2.810917615890503 ], [ 0.5224538445472717, 2.8509483337402344 ], [ 0.49690231680870056, 2.8910679817199707 ], [ 0.47094982862472534, 2.93127179145813 ], [ 0.44462811946868896, 2.971554756164551 ], [ 0.4179706573486328, 3.011911630630493 ], [ 0.3910125195980072, 3.0523364543914795 ], [ 0.3637904226779938, 3.0928235054016113 ], [ 0.336342453956604, 3.133366107940674 ], [ 0.3087080717086792, 3.173957586288452 ], [ 0.2809275984764099, 3.2145910263061523 ], [ 0.25304216146469116, 3.2552595138549805 ], [ 0.2250932902097702, 3.2959556579589844 ], [ 0.19712261855602264, 3.336672067642212 ], [ 0.16917157173156738, 3.3774020671844482 ], [ 0.14128103852272034, 3.418138027191162 ], [ 0.11349108070135117, 3.4588732719421387 ], [ 0.08584070205688477, 3.499600648880005 ], [ 0.05836760997772217, 3.540313243865967 ], [ 0.031108075752854347, 3.5810046195983887 ], [ 0.004096813499927521, 3.6216683387756348 ], [ -0.022633060812950134, 3.6622982025146484 ], [ -0.049050018191337585, 3.702888250350952 ], [ -0.07512406259775162, 3.7434325218200684 ], [ -0.10082659870386124, 3.7839250564575195 ], [ -0.12613031268119812, 3.8243606090545654 ], [ -0.1510089486837387, 3.8647332191467285 ], [ -0.17543718218803406, 3.9050378799438477 ], [ -0.19939035177230835, 3.9452688694000244 ], [ -0.22284428775310516, 3.9854207038879395 ], [ -0.24577516317367554, 4.025487899780273 ], [ -0.26815930008888245, 4.065464496612549 ], [ -0.28985288739204407, 4.1053338050842285 ], [ -0.3105919361114502, 4.145066261291504 ], [ -0.33011239767074585, 4.184632301330566 ], [ -0.34815022349357605, 4.224003314971924 ], [ -0.3644413650035858, 4.263148784637451 ], [ -0.3787217438220978, 4.302039623260498 ], [ -0.3907274007797241, 4.340645790100098 ], [ -0.4001944959163666, 4.378937721252441 ], [ -0.40685927867889404, 4.416884899139404 ], [ -0.41045811772346497, 4.4544572830200195 ], [ -0.41072750091552734, 4.491624355316162 ], [ -0.40740400552749634, 4.528356552124023 ], [ -0.4004855453968048, 4.564655303955078 ], [ -0.3902314007282257, 4.600553512573242 ], [ -0.37690070271492004, 4.636085033416748 ], [ -0.3607523441314697, 4.67128324508667 ], [ -0.3420449197292328, 4.706181049346924 ], [ -0.32103657722473145, 4.740812301635742 ], [ -0.2979850471019745, 4.775211334228516 ], [ -0.2731475234031677, 4.809412002563477 ], [ -0.24678070843219757, 4.843447685241699 ], [ -0.21914079785346985, 4.877353668212891 ], [ -0.19048355519771576, 4.911164283752441 ], [ -0.16106447577476501, 4.944913864135742 ], [ -0.13102509081363678, 4.978619575500488 ], [ -0.10039319843053818, 5.0122785568237305 ], [ -0.06919693201780319, 5.0458903312683105 ], [ -0.03746507689356804, 5.079452991485596 ], [ -0.005227350629866123, 5.1129655838012695 ], [ 0.027485284954309464, 5.146428108215332 ], [ 0.06064034625887871, 5.179840087890625 ], [ 0.09420355409383774, 5.213201522827148 ], [ 0.12813864648342133, 5.246513843536377 ], [ 0.16240715980529785, 5.279778003692627 ], [ 0.19696833193302155, 5.312995910644531 ], [ 0.2317790538072586, 5.3461689949035645 ], [ 0.266793817281723, 5.379299640655518 ], [ 0.30196475982666016, 5.412391662597656 ], [ 0.3372417688369751, 5.4454474449157715 ], [ 0.37257257103919983, 5.478470325469971 ], [ 0.40790289640426636, 5.511464595794678 ], [ 0.4431767165660858, 5.544434547424316 ], [ 0.478336364030838, 5.577383518218994 ], [ 0.5133227705955505, 5.610316753387451 ], [ 0.5480756759643555, 5.643238544464111 ], [ 0.5825338363647461, 5.676154136657715 ], [ 0.6166353225708008, 5.709067344665527 ], [ 0.6503174901008606, 5.741983413696289 ], [ 0.6835171580314636, 5.774907112121582 ], [ 0.7161709666252136, 5.8078436851501465 ], [ 0.7482153177261353, 5.840796947479248 ], [ 0.7795863747596741, 5.873773097991943 ], [ 0.8102203011512756, 5.906775951385498 ], [ 0.8400532603263855, 5.939810276031494 ], [ 0.8690049648284912, 5.9728803634643555 ], [ 0.8969439268112183, 6.005988121032715 ], [ 0.9237036108970642, 6.039134979248047 ], [ 0.9491175413131714, 6.072321891784668 ], [ 0.9730191230773926, 6.105550289154053 ], [ 0.9952419996261597, 6.138820171356201 ], [ 1.0156195163726807, 6.172133445739746 ], [ 1.0339852571487427, 6.205490589141846 ], [ 1.0501729249954224, 6.238892555236816 ], [ 1.0640157461166382, 6.272340774536133 ], [ 1.075347661972046, 6.305835723876953 ], [ 1.084001898765564, 6.339378356933594 ], [ 1.0898123979568481, 6.372969627380371 ], [ 1.0927863121032715, 6.406609058380127 ], [ 1.093105435371399, 6.440292835235596 ], [ 1.090950846672058, 6.474018096923828 ], [ 1.0865041017532349, 6.507782459259033 ], [ 1.079946517944336, 6.541581630706787 ], [ 1.0714592933654785, 6.575413227081299 ], [ 1.0612238645553589, 6.609274387359619 ], [ 1.0494211912155151, 6.643161773681641 ], [ 1.036232590675354, 6.677072525024414 ], [ 1.0218391418457031, 6.71100378036499 ], [ 1.006421685218811, 6.7449517250061035 ], [ 0.9901613593101501, 6.778914451599121 ], [ 0.9732388854026794, 6.812887668609619 ], [ 0.9558353424072266, 6.846869468688965 ], [ 0.9380521178245544, 6.880856990814209 ], [ 0.9199115633964539, 6.91485071182251 ], [ 0.9014359712600708, 6.948850154876709 ], [ 0.8826476335525513, 6.98285436630249 ], [ 0.8635689616203308, 7.016862869262695 ], [ 0.8442226052284241, 7.050876140594482 ], [ 0.8246312141418457, 7.084892749786377 ], [ 0.8048177361488342, 7.118912696838379 ], [ 0.7848052978515625, 7.15293550491333 ], [ 0.7646171450614929, 7.186960697174072 ], [ 0.7442769408226013, 7.220987796783447 ], [ 0.7238084673881531, 7.255016326904297 ], [ 0.703235924243927, 7.289045810699463 ], [ 0.682583749294281, 7.323075771331787 ], [ 0.6618766784667969, 7.357105731964111 ], [ 0.6411397457122803, 7.391135215759277 ], [ 0.6203982830047607, 7.425163269042969 ], [ 0.5996780395507812, 7.4591898918151855 ], [ 0.5790049433708191, 7.4932146072387695 ], [ 0.5584054589271545, 7.5272369384765625 ], [ 0.5379061102867126, 7.56125545501709 ], [ 0.5175339579582214, 7.595271110534668 ], [ 0.4973162114620209, 7.629281997680664 ], [ 0.4772806167602539, 7.663288116455078 ], [ 0.457455039024353, 7.697288990020752 ], [ 0.43786779046058655, 7.731284141540527 ], [ 0.41854748129844666, 7.765272617340088 ], [ 0.3995230197906494, 7.799253940582275 ], [ 0.38082367181777954, 7.833227634429932 ], [ 0.3624790608882904, 7.867193222045898 ], [ 0.34451907873153687, 7.901149749755859 ], [ 0.32697397470474243, 7.935096263885498 ], [ 0.3098742961883545, 7.969033241271973 ], [ 0.2932509779930115, 8.002959251403809 ], [ 0.2771351933479309, 8.036873817443848 ], [ 0.2615584135055542, 8.070775985717773 ], [ 0.24655242264270782, 8.10466480255127 ], [ 0.23214924335479736, 8.138541221618652 ], [ 0.21838118135929108, 8.172403335571289 ], [ 0.20528072118759155, 8.206250190734863 ], [ 0.19288058578968048, 8.240081787109375 ], [ 0.18121366202831268, 8.273897171020508 ], [ 0.17031297087669373, 8.307695388793945 ], [ 0.1602116823196411, 8.341476440429688 ], [ 0.15094305574893951, 8.375238418579102 ], [ 0.14254039525985718, 8.408982276916504 ], [ 0.13503706455230713, 8.442705154418945 ], [ 0.12846647202968597, 8.476408958435059 ], [ 0.12282804399728775, 8.510091781616211 ], [ 0.11808725446462631, 8.543754577636719 ], [ 0.11420957744121552, 8.577399253845215 ], [ 0.11116043478250504, 8.6110258102417 ], [ 0.10890527069568634, 8.644634246826172 ], [ 0.10740949213504791, 8.678226470947266 ], [ 0.10663850605487823, 8.711803436279297 ], [ 0.1065577045083046, 8.74536418914795 ], [ 0.10713250190019608, 8.778911590576172 ], [ 0.10832829773426056, 8.812445640563965 ], [ 0.11011053621768951, 8.845966339111328 ], [ 0.112444669008255, 8.879474639892578 ], [ 0.11529617011547089, 8.912972450256348 ], [ 0.11863056570291519, 8.946459770202637 ], [ 0.12241341173648834, 8.979937553405762 ], [ 0.12661030888557434, 9.013405799865723 ], [ 0.1311868578195572, 9.046866416931152 ], [ 0.13610877096652985, 9.080318450927734 ], [ 0.14134173095226288, 9.113764762878418 ], [ 0.14685149490833282, 9.147205352783203 ], [ 0.15260380506515503, 9.18064022064209 ], [ 0.158564493060112, 9.214071273803711 ], [ 0.16469934582710266, 9.24749755859375 ], [ 0.17097420990467072, 9.280921936035156 ], [ 0.17735493183135986, 9.314343452453613 ], [ 0.1838073432445526, 9.347764015197754 ], [ 0.19029729068279266, 9.381183624267578 ], [ 0.19679751992225647, 9.414603233337402 ], [ 0.20329780876636505, 9.448022842407227 ], [ 0.2097981721162796, 9.481443405151367 ], [ 0.21629860997200012, 9.514863014221191 ], [ 0.22279909253120422, 9.548283576965332 ], [ 0.2292996346950531, 9.581703186035156 ], [ 0.23580022156238556, 9.615123748779297 ], [ 0.2423008531332016, 9.648544311523438 ], [ 0.24880154430866241, 9.681964874267578 ], [ 0.2553022503852844, 9.715385437011719 ], [ 0.2618030309677124, 9.74880599975586 ], [ 0.2683038115501404, 9.7822265625 ], [ 0.27480462193489075, 9.815648078918457 ], [ 0.2813054919242859, 9.849068641662598 ], [ 0.28780636191368103, 9.882490158081055 ], [ 0.29430726170539856, 9.915910720825195 ], [ 0.3008081614971161, 9.949331283569336 ], [ 0.307309091091156, 9.982752799987793 ], [ 0.3138100206851959, 10.01617431640625 ], [ 0.3203109800815582, 10.04959487915039 ], [ 0.32681193947792053, 10.083016395568848 ], [ 0.33331289887428284, 10.116436958312988 ], [ 0.33981388807296753, 10.149858474731445 ], [ 0.34631484746932983, 10.183279991149902 ], [ 0.3528158366680145, 10.216700553894043 ], [ 0.3593168258666992, 10.2501220703125 ], [ 0.3658177852630615, 10.283543586730957 ], [ 0.3723187744617462, 10.316965103149414 ], [ 0.3804450035095215, 10.35874080657959 ], [ 0.3853207528591156, 10.383807182312012 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/05_root_path/meta.json ================================================ { "text": "A person is casually walking forward slowly", "duration": 10.0, "num_samples": 1, "seed": 42, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/06_root_waypoints/constraints.json ================================================ [ { "type": "root2d", "frame_indices": [ 0, 90, 180 ], "smooth_root_2d": [ [ 0.0, -0.013232914730906487 ], [ -1.1690130233764648, 1.5332785844802856 ], [ 0.738669753074646, 1.4469488859176636 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/06_root_waypoints/meta.json ================================================ { "text": "A person is doing a hip hop dance while moving around", "duration": 6.033333333333333, "num_samples": 1, "seed": 42, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/07_mixed_constraints/constraints.json ================================================ [ { "type": "fullbody", "frame_indices": [ 108 ], "local_joints_rot": [ [ [ -0.035887543112039566, -0.02776639349758625, -0.005372282117605209 ], [ 0.06515975296497345, -0.010784560814499855, 0.006556123960763216 ], [ -0.06292378902435303, -0.05156821012496948, -0.009085050784051418 ], [ 0.11570766568183899, -0.0793282613158226, -0.03867234289646149 ], [ 0.09106606245040894, 0.06571822613477707, 0.002558206906542182 ], [ -0.06086159870028496, 0.10295507311820984, 0.02592187374830246 ], [ -0.15437740087509155, 0.16596992313861847, 0.009326435625553131 ], [ -0.0005251984694041312, 0.0018051519291475415, -9.946066711563617e-05 ], [ -0.184775248169899, -0.064349465072155, 0.00573313795030117 ], [ -0.18454650044441223, 0.068090058863163, -0.005659883841872215 ], [ 0.20501427352428436, -0.14578332006931305, -0.04773213341832161 ], [ 0.26504039764404297, -0.16855353116989136, -1.0829373598098755 ], [ 0.006512798834592104, -0.6961542367935181, -0.011537229642271996 ], [ 0.07062757760286331, 0.03925099968910217, -0.027518808841705322 ], [ 0.14896969497203827, 0.29287680983543396, -0.2919791340827942 ], [ 0.009383739903569221, 0.0508926659822464, -1.056564450263977 ], [ 0.11172245442867279, 0.12029653787612915, -0.12930497527122498 ], [ -0.41130027174949646, -0.5924108028411865, -0.0006285393028520048 ], [ 0.006594705395400524, 0.4732210040092468, -0.002528452081605792 ], [ -0.32021215558052063, -0.25638389587402344, -0.3734903335571289 ], [ 0.09024477005004883, -0.2926441431045532, 0.2660353481769562 ], [ -0.09575983881950378, -0.055268142372369766, 0.8844737410545349 ], [ -0.0118059441447258, 0.07546520978212357, 0.0746397078037262 ], [ 0.8310757875442505, -0.012923321686685085, 0.004925338551402092 ], [ 0.03474503755569458, -0.23956389725208282, -0.16712959110736847 ], [ -0.09206951409578323, -0.03187529370188713, 0.027407124638557434 ], [ -0.2677958309650421, 0.11606352031230927, 0.036957308650016785 ], [ 0.394832044839859, -0.0007178321247920394, 0.0004849981633014977 ], [ -0.09032224863767624, -0.14483025670051575, -0.015989331528544426 ], [ -0.0217722300440073, 0.01900928094983101, -0.025495363399386406 ] ] ], "root_positions": [ [ -0.09470777958631516, 0.9947724342346191, -3.980208396911621 ] ], "smooth_root_2d": [ [ -0.09470777958631516, -3.980208396911621 ] ] }, { "type": "root2d", "frame_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151 ], "smooth_root_2d": [ [ -0.022358937188982964, 0.03532936051487923 ], [ -0.024468135088682175, -0.0013195642968639731 ], [ -0.02657654881477356, -0.037969205528497696 ], [ -0.028683679178357124, -0.07462010532617569 ], [ -0.030789025127887726, -0.11127285659313202 ], [ -0.032892078161239624, -0.14792808890342712 ], [ -0.03499194607138634, -0.184586301445961 ], [ -0.03708736225962639, -0.2212478667497635 ], [ -0.03917701542377472, -0.25791314244270325 ], [ -0.04125956818461418, -0.2945826053619385 ], [ -0.04333365708589554, -0.3312567174434662 ], [ -0.045397885143756866, -0.3679359555244446 ], [ -0.04745082929730415, -0.4046209156513214 ], [ -0.04949106276035309, -0.44131216406822205 ], [ -0.05151714012026787, -0.4780103266239166 ], [ -0.05352761223912239, -0.5147159695625305 ], [ -0.05552104488015175, -0.551429808139801 ], [ -0.05749599635601044, -0.5881525278091431 ], [ -0.059451062232255936, -0.6248847842216492 ], [ -0.061384834349155426, -0.6616273522377014 ], [ -0.06329593807458878, -0.6983808875083923 ], [ -0.06518300622701645, -0.7351461052894592 ], [ -0.06704472005367279, -0.7719237804412842 ], [ -0.06887973845005035, -0.8087146878242493 ], [ -0.07068677246570587, -0.8455195426940918 ], [ -0.07246451079845428, -0.8823391795158386 ], [ -0.07421167194843292, -0.9191742539405823 ], [ -0.07592695951461792, -0.9560256004333496 ], [ -0.07760907709598541, -0.9928940534591675 ], [ -0.07925672084093094, -1.029780387878418 ], [ -0.08086856454610825, -1.0666853189468384 ], [ -0.0824432522058487, -1.1036096811294556 ], [ -0.08397942036390305, -1.1405543088912964 ], [ -0.08547566086053848, -1.1775201559066772 ], [ -0.08693055063486099, -1.2145079374313354 ], [ -0.08834262937307358, -1.2515183687210083 ], [ -0.08971039950847626, -1.2885526418685913 ], [ -0.09103234112262726, -1.3256113529205322 ], [ -0.09230689704418182, -1.362695574760437 ], [ -0.0935325101017952, -1.3998061418533325 ], [ -0.09470757842063904, -1.4369438886642456 ], [ -0.09583047777414322, -1.4741098880767822 ], [ -0.0968996062874794, -1.5113049745559692 ], [ -0.09791331738233566, -1.548530101776123 ], [ -0.09886999428272247, -1.58578622341156 ], [ -0.0997680053114891, -1.6230742931365967 ], [ -0.10060573369264603, -1.6603953838348389 ], [ -0.1013815775513649, -1.6977503299713135 ], [ -0.10209395736455917, -1.7351402044296265 ], [ -0.1027413085103035, -1.7725658416748047 ], [ -0.10332208126783371, -1.8100284337997437 ], [ -0.10383477061986923, -1.8475286960601807 ], [ -0.10427788645029068, -1.8850678205490112 ], [ -0.10464996099472046, -1.9226467609405518 ], [ -0.10494954138994217, -1.9602664709091187 ], [ -0.10517755895853043, -1.997925877571106 ], [ -0.10533731430768967, -2.0356218814849854 ], [ -0.10543208569288254, -2.0733516216278076 ], [ -0.10546516627073288, -2.111111879348755 ], [ -0.10543984919786453, -2.148899555206299 ], [ -0.10535937547683716, -2.1867120265960693 ], [ -0.10522699356079102, -2.224546194076538 ], [ -0.10504589974880219, -2.262399435043335 ], [ -0.10481927543878555, -2.3002686500549316 ], [ -0.10455025732517242, -2.338151216506958 ], [ -0.10424194484949112, -2.376044511795044 ], [ -0.10389743000268936, -2.4139459133148193 ], [ -0.10351976752281189, -2.451852560043335 ], [ -0.10311200469732285, -2.4897620677948 ], [ -0.10267717391252518, -2.5276718139648438 ], [ -0.10221832990646362, -2.5655791759490967 ], [ -0.10173854231834412, -2.6034812927246094 ], [ -0.10124091058969498, -2.64137601852417 ], [ -0.10072856396436691, -2.67926025390625 ], [ -0.100204698741436, -2.7171311378479004 ], [ -0.09967257082462311, -2.754986047744751 ], [ -0.09913549572229385, -2.7928221225738525 ], [ -0.09859687089920044, -2.8306362628936768 ], [ -0.09806016832590103, -2.8684253692626953 ], [ -0.09752892702817917, -2.906186103820801 ], [ -0.09700676798820496, -2.943915367126465 ], [ -0.09649737179279327, -2.98160982131958 ], [ -0.09600447863340378, -3.019265651702881 ], [ -0.09553186595439911, -3.056879758834839 ], [ -0.09508336335420609, -3.0944483280181885 ], [ -0.09466280788183212, -3.131967782974243 ], [ -0.09427405893802643, -3.1694345474243164 ], [ -0.09392096847295761, -3.2068448066711426 ], [ -0.09360739588737488, -3.244194984436035 ], [ -0.09333716332912445, -3.2814812660217285 ], [ -0.09311125427484512, -3.3187034130096436 ], [ -0.09292776882648468, -3.355863571166992 ], [ -0.0927848145365715, -3.3929643630981445 ], [ -0.09268050640821457, -3.4300084114074707 ], [ -0.09261301904916763, -3.4669981002807617 ], [ -0.09258053451776505, -3.5039358139038086 ], [ -0.09258133918046951, -3.5408236980438232 ], [ -0.09261377900838852, -3.5776638984680176 ], [ -0.0926763191819191, -3.6144583225250244 ], [ -0.09276753664016724, -3.6512088775634766 ], [ -0.09288612008094788, -3.687917470932007 ], [ -0.0930309146642685, -3.7245850563049316 ], [ -0.09320087730884552, -3.7612133026123047 ], [ -0.09339512139558792, -3.7978031635284424 ], [ -0.09361287951469421, -3.8343558311462402 ], [ -0.09385351091623306, -3.8708720207214355 ], [ -0.09411647915840149, -3.9073524475097656 ], [ -0.09440135210752487, -3.9437978267669678 ], [ -0.09470777958631516, -3.980208396911621 ], [ -0.09503547102212906, -4.016584873199463 ], [ -0.09538418799638748, -4.052927494049072 ], [ -0.09575372189283371, -4.089236736297607 ], [ -0.09614387899637222, -4.125512599945068 ], [ -0.0965544655919075, -4.1617560386657715 ], [ -0.09698529541492462, -4.197966575622559 ], [ -0.09743614494800568, -4.234145641326904 ], [ -0.09790677577257156, -4.27029275894165 ], [ -0.09839694201946259, -4.306408882141113 ], [ -0.09890634566545486, -4.342494487762451 ], [ -0.09943470358848572, -4.378549575805664 ], [ -0.09998169541358948, -4.41457462310791 ], [ -0.10054702311754227, -4.450570583343506 ], [ -0.10113038867712021, -4.486537456512451 ], [ -0.10173150897026062, -4.522475242614746 ], [ -0.1023501306772232, -4.558384895324707 ], [ -0.10298605263233185, -4.594265937805176 ], [ -0.10363911837339401, -4.6301188468933105 ], [ -0.10430921614170074, -4.665942668914795 ], [ -0.10499630123376846, -4.701738357543945 ], [ -0.10570038110017776, -4.737504482269287 ], [ -0.10642150044441223, -4.7732415199279785 ], [ -0.10715975612401962, -4.808948040008545 ], [ -0.10791526734828949, -4.844624042510986 ], [ -0.10868816822767258, -4.880269527435303 ], [ -0.10947857797145844, -4.915882587432861 ], [ -0.11028657108545303, -4.9514641761779785 ], [ -0.11111218482255936, -4.98701286315918 ], [ -0.11195536702871323, -5.022529602050781 ], [ -0.1128159612417221, -5.058013439178467 ], [ -0.11369368433952332, -5.093465328216553 ], [ -0.11458808928728104, -5.128885746002197 ], [ -0.11549859493970871, -5.164275646209717 ], [ -0.11642441153526306, -5.199635982513428 ], [ -0.11736457794904709, -5.234969615936279 ], [ -0.11831795424222946, -5.270277500152588 ], [ -0.11928320676088333, -5.305562496185303 ], [ -0.12025882303714752, -5.340827941894531 ], [ -0.12124315649271011, -5.3760762214660645 ], [ -0.12223441153764725, -5.41131067276001 ], [ -0.12323068082332611, -5.446536064147949 ], [ -0.12448007613420486, -5.4905595779418945 ], [ -0.1252303272485733, -5.516972541809082 ] ] } ] ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/07_mixed_constraints/meta.json ================================================ { "text": "A person walking backward points to the right side with their right hand", "duration": 5.066666666666666, "num_samples": 1, "seed": 49, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/demo/examples/kimodo-soma-rp/08_stylized_text/meta.json ================================================ { "text": "A zombie with their left arm extended forward walks with an uneven gait at a slow pace.", "duration": 4.033333333333333, "num_samples": 1, "seed": 42, "diffusion_steps": 100, "cfg": { "enabled": true, "text_weight": 2.0, "constraint_weight": 2.0 } } ================================================ FILE: kimodo/assets/skeletons/g1skel34/xml/g1.xml ================================================ > ================================================ FILE: kimodo/assets/skeletons/somaskel77/somaskel77_standard_tpose.bvh ================================================ HIERARCHY ROOT Root { OFFSET 0.0 0.0 0.0 CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation JOINT Hips { OFFSET 0.0 100.0 0.0 CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation JOINT Spine1 { OFFSET -0.013727 5.003763 -0.053727 CHANNELS 3 Zrotation Yrotation Xrotation JOINT Spine2 { OFFSET -0.0 7.125301 -0.029825 CHANNELS 3 Zrotation Yrotation Xrotation JOINT Chest { OFFSET -1e-06 7.550063 -0.815971 CHANNELS 3 Zrotation Yrotation Xrotation JOINT Neck1 { OFFSET -0.181677 26.311295 -0.553348 CHANNELS 3 Zrotation Yrotation Xrotation JOINT Neck2 { OFFSET -3e-06 7.709397 2.302585 CHANNELS 3 Zrotation Yrotation Xrotation JOINT Head { OFFSET -5e-06 6.128916 1.953709 CHANNELS 3 Zrotation Yrotation Xrotation JOINT HeadEnd { OFFSET 0.003598 16.065403 -1.835379 CHANNELS 3 Zrotation Yrotation Xrotation } JOINT Jaw { OFFSET 0.002637 0.475592 3.094941 CHANNELS 3 Zrotation Yrotation Xrotation } JOINT LeftEye { OFFSET 3.206381 5.380205 7.586883 CHANNELS 3 Zrotation Yrotation Xrotation } JOINT RightEye { OFFSET -3.22244 5.361869 7.558234 CHANNELS 3 Zrotation Yrotation Xrotation } } } } JOINT LeftShoulder { OFFSET 1.621652 23.237164 5.113413 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftArm { OFFSET 14.919846 2e-06 -5.502326 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftForeArm { OFFSET 28.739307 0.0 -0.002588 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHand { OFFSET 27.093981 -1e-06 0.002609 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandThumb1 { OFFSET 2.276482 -1.392045 3.191413 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandThumb2 { OFFSET 4.012836 -1.828127 1.641654 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandThumb3 { OFFSET 2.798515 0.0 -3e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandThumbEnd { OFFSET 3.180793 -4e-06 4e-06 CHANNELS 3 Zrotation Yrotation Xrotation } } } } JOINT LeftHandIndex1 { OFFSET 3.247555 -0.531998 2.296169 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandIndex2 { OFFSET 6.364578 0.01206 0.1786 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandIndex3 { OFFSET 3.662364 0.0 0.0 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandIndex4 { OFFSET 2.329242 4e-06 4e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandIndexEnd { OFFSET 2.759615 -0.180537 -0.113024 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT LeftHandMiddle1 { OFFSET 3.163495 0.240981 1.000332 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandMiddle2 { OFFSET 6.19078 -0.259278 -1.002548 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandMiddle3 { OFFSET 4.35652 -4e-06 -1e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandMiddle4 { OFFSET 2.996877 -8e-06 0.0 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandMiddleEnd { OFFSET 2.304287 -0.294569 -0.031741 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT LeftHandRing1 { OFFSET 2.882643 -0.053652 -0.322543 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandRing2 { OFFSET 5.854541 -0.486202 -1.373841 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandRing3 { OFFSET 4.350578 0.0 3e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandRing4 { OFFSET 2.651321 7e-06 2e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandRingEnd { OFFSET 1.936105 0.077687 -7.1e-05 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT LeftHandPinky1 { OFFSET 2.8655 -0.310005 -1.600378 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandPinky2 { OFFSET 5.087849 -1.331141 -1.77123 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandPinky3 { OFFSET 3.070974 4e-06 0.0 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandPinky4 { OFFSET 1.549672 0.0 1e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftHandPinkyEnd { OFFSET 1.944893 -0.157802 0.057219 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } } } } } JOINT RightShoulder { OFFSET -1.380118 23.180309 5.214158 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightArm { OFFSET -15.037196 1.2e-05 -5.545604 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightForeArm { OFFSET -28.736639 2e-06 -0.002597 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHand { OFFSET -27.133619 -0.0 0.002613 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandThumb1 { OFFSET -2.274032 -1.383988 3.163127 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandThumb2 { OFFSET -4.011429 -1.827466 1.640914 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandThumb3 { OFFSET -2.794935 -4e-06 -3e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandThumbEnd { OFFSET -3.183852 4e-06 1e-06 CHANNELS 3 Zrotation Yrotation Xrotation } } } } JOINT RightHandIndex1 { OFFSET -3.253266 -0.520057 2.282866 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandIndex2 { OFFSET -6.341917 0.012471 0.178266 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandIndex3 { OFFSET -3.654871 -8e-06 -0.0 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandIndex4 { OFFSET -2.327586 0.0 1e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandIndexEnd { OFFSET -2.76179 -0.180656 -0.113078 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT RightHandMiddle1 { OFFSET -3.168106 0.246593 1.00103 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandMiddle2 { OFFSET -6.180828 -0.258836 -1.000895 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandMiddle3 { OFFSET -4.348901 0.0 -0.0 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandMiddle4 { OFFSET -3.00024 -4e-06 -2e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandMiddleEnd { OFFSET -2.30252 -0.29437 -0.031706 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT RightHandRing1 { OFFSET -2.88569 -0.067952 -0.308858 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandRing2 { OFFSET -5.854198 -0.48613 -1.373731 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandRing3 { OFFSET -4.33881 -4e-06 -0.0 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandRing4 { OFFSET -2.654903 -4e-06 4e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandRingEnd { OFFSET -1.933568 0.077527 -5.2e-05 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT RightHandPinky1 { OFFSET -2.866425 -0.342796 -1.584145 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandPinky2 { OFFSET -5.091371 -1.332055 -1.772385 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandPinky3 { OFFSET -3.062664 -4e-06 1e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandPinky4 { OFFSET -1.546529 4e-06 -2e-06 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightHandPinkyEnd { OFFSET -1.945119 -0.157718 0.057211 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } } } } } } } } JOINT LeftLeg { OFFSET 10.043214 -8.434526 2.595655 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftShin { OFFSET -1e-06 -43.221752 -0.802913 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftFoot { OFFSET 1e-06 -42.155094 -3.481523 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftToeBase { OFFSET 0.0 -5.059472 13.231529 CHANNELS 3 Zrotation Yrotation Xrotation JOINT LeftToeEnd { OFFSET -0.009607 -1.647619 6.513017 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } JOINT RightLeg { OFFSET -10.047278 -8.29526 2.620317 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightShin { OFFSET 1e-06 -43.362206 -0.805556 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightFoot { OFFSET 2e-06 -42.117393 -3.478398 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightToeBase { OFFSET -0.0 -5.079609 13.284196 CHANNELS 3 Zrotation Yrotation Xrotation JOINT RightToeEnd { OFFSET 0.009532 -1.634378 6.460591 CHANNELS 3 Zrotation Yrotation Xrotation } } } } } } } MOTION Frames: 1 Frame Time: 0.03333333333333333 0.0 0.0 0.0 0.0 0.0 0.0 0.0 100.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ================================================ FILE: kimodo/assets.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from pathlib import Path PACKAGE_ROOT = Path(__file__).resolve().parent ASSETS_ROOT = PACKAGE_ROOT / "assets" DEMO_ASSETS_ROOT = ASSETS_ROOT / "demo" DEMO_EXAMPLES_ROOT = DEMO_ASSETS_ROOT / "examples" SKELETONS_ROOT = ASSETS_ROOT / "skeletons" SOMA_ASSETS_ROOT = ASSETS_ROOT / "SOMA" def skeleton_asset_path(*parts: str) -> Path: return SKELETONS_ROOT.joinpath(*parts) def demo_asset_path(*parts: str) -> Path: return DEMO_ASSETS_ROOT.joinpath(*parts) ================================================ FILE: kimodo/constraints.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Constraint sets for conditioning motion generation (root 2D, full body, end-effectors).""" from typing import Optional, Union import torch from torch import Tensor from kimodo.motion_rep.feature_utils import compute_heading_angle from kimodo.skeleton import SkeletonBase, SOMASkeleton30, SOMASkeleton77 from kimodo.tools import ensure_batched, load_json, save_json from .geometry import axis_angle_to_matrix, matrix_to_axis_angle def _convert_constraint_local_rots_to_skeleton(local_rot_mats: Tensor, skeleton: SkeletonBase) -> Tensor: """Convert loaded local rotation matrices to match the skeleton's joint count. Handles SOMA 30↔77: constraint files may have been saved with 30 or 77 joints while the session skeleton (e.g. from the SOMA30 model) uses SOMASkeleton77. """ n_joints = local_rot_mats.shape[-3] skeleton_joints = skeleton.nbjoints if n_joints == skeleton_joints: return local_rot_mats if n_joints == 77 and skeleton_joints == 30 and isinstance(skeleton, SOMASkeleton30): return skeleton.from_SOMASkeleton77(local_rot_mats) if n_joints == 30 and skeleton_joints == 77 and isinstance(skeleton, SOMASkeleton77): skel30 = SOMASkeleton30() return skel30.to_SOMASkeleton77(local_rot_mats) raise ValueError( f"Constraint joint count ({n_joints}) does not match skeleton joint count " f"({skeleton_joints}). Only SOMA 30↔77 conversion is supported." ) def create_pairs(tensor_A: Tensor, tensor_B: Tensor) -> Tensor: """Form all (a, b) pairs from two 1D tensors; output shape (len(A)*len(B), 2).""" pairs = torch.stack( ( tensor_A[:, None].expand(-1, len(tensor_B)), tensor_B.expand(len(tensor_A), -1), ), dim=-1, ).reshape(-1, 2) return pairs def compute_global_heading(global_joints_positions: Tensor, skeleton: SkeletonBase) -> Tensor: """Compute global root heading (cos, sin) from global joint positions using skeleton.""" root_heading_angle = compute_heading_angle(global_joints_positions, skeleton) global_root_heading = torch.stack([torch.cos(root_heading_angle), torch.sin(root_heading_angle)], dim=-1) return global_root_heading def _tensor_to( t: Tensor, device: Optional[Union[str, torch.device]] = None, dtype: Optional[torch.dtype] = None, ) -> Tensor: """Move tensor to device and/or dtype. Returns same tensor if no args. """ if device is not None and dtype is not None: return t.to(device=device, dtype=dtype) if device is not None: return t.to(device=device) if dtype is not None: return t.to(dtype=dtype) return t class Root2DConstraintSet: """Constraint set fixing root (x, z) trajectory and optionally global heading on given frames.""" name = "root2d" def __init__( self, skeleton: SkeletonBase, frame_indices: Tensor, smooth_root_2d: Tensor, to_crop: bool = False, global_root_heading: Optional[Tensor] = None, ) -> None: self.skeleton = skeleton # if we pass the full smooth root 3D as input if smooth_root_2d.shape[-1] == 3: smooth_root_2d = smooth_root_2d[..., [0, 1]] if to_crop: smooth_root_2d = smooth_root_2d[frame_indices] if global_root_heading is not None: global_root_heading = global_root_heading[frame_indices] else: assert len(smooth_root_2d) == len( frame_indices ), "The number of smooth root 2d should be match the number of frames" if global_root_heading is not None: assert len(global_root_heading) == len( frame_indices ), "The number of global root heading should be match the number of frames" self.smooth_root_2d = smooth_root_2d self.global_root_heading = global_root_heading self.frame_indices = frame_indices def update_constraints(self, data_dict: dict, index_dict: dict) -> None: """Append this constraint's smooth_root_2d (and optional global_root_heading) to data/index dicts.""" data_dict["smooth_root_2d"].append(self.smooth_root_2d) index_dict["smooth_root_2d"].append(self.frame_indices) if self.global_root_heading is not None: # constraint the global heading data_dict["global_root_heading"].append(self.global_root_heading) index_dict["global_root_heading"].append(self.frame_indices) def crop_move(self, start: int, end: int) -> "Root2DConstraintSet": """Return a new constraint set for the cropped frame range [start, end).""" mask = (self.frame_indices >= start) & (self.frame_indices < end) if self.global_root_heading is not None: masked_global_root_heading = self.global_root_heading[mask] else: masked_global_root_heading = None return Root2DConstraintSet( self.skeleton, self.frame_indices[mask] - start, self.smooth_root_2d[mask], global_root_heading=masked_global_root_heading, ) def get_save_info(self) -> dict: """Return a dict suitable for JSON serialization (frame_indices, smooth_root_2d, optional global_root_heading).""" out = { "type": self.name, "frame_indices": self.frame_indices, "smooth_root_2d": self.smooth_root_2d, } if self.global_root_heading is not None: out["global_root_heading"] = self.global_root_heading return out def to( self, device: Optional[Union[str, torch.device]] = None, dtype: Optional[torch.dtype] = None, ) -> "Root2DConstraintSet": self.smooth_root_2d = _tensor_to(self.smooth_root_2d, device, dtype) self.frame_indices = _tensor_to(self.frame_indices, device, dtype) if self.global_root_heading is not None: self.global_root_heading = _tensor_to(self.global_root_heading, device, dtype) if device is not None and hasattr(self.skeleton, "to"): self.skeleton = self.skeleton.to(device) return self @classmethod def from_dict(cls, skeleton: SkeletonBase, dico: dict) -> "Root2DConstraintSet": """Build a Root2DConstraintSet from a dict (e.g. loaded from JSON).""" device = skeleton.device if hasattr(skeleton, "device") else "cpu" if "global_root_heading" in dico: global_root_heading = torch.tensor(dico["global_root_heading"], device=device) else: global_root_heading = None return cls( skeleton, frame_indices=torch.tensor(dico["frame_indices"]), smooth_root_2d=torch.tensor(dico["smooth_root_2d"], device=device), global_root_heading=global_root_heading, ) class FullBodyConstraintSet: """Constraint set fixing full-body global positions and rotations on given keyframes.""" name = "fullbody" def __init__( self, skeleton: SkeletonBase, frame_indices: Tensor, global_joints_positions: Tensor, global_joints_rots: Tensor, smooth_root_2d: Optional[Tensor] = None, to_crop: bool = False, ): self.skeleton = skeleton self.frame_indices = frame_indices # if we pass the full smooth root 3D as input if smooth_root_2d is not None and smooth_root_2d.shape[-1] == 3: smooth_root_2d = smooth_root_2d[..., [0, 1]] if to_crop: global_joints_positions = global_joints_positions[frame_indices] global_joints_rots = global_joints_rots[frame_indices] if smooth_root_2d is not None: smooth_root_2d = smooth_root_2d[frame_indices] else: assert len(global_joints_positions) == len( frame_indices ), "The number of global positions should be match the number of frames" assert len(global_joints_rots) == len( frame_indices ), "The number of global joint rotations should be match the number of frames" if smooth_root_2d is not None: assert len(smooth_root_2d) == len( frame_indices ), "The number of smooth root 2d (if specified) should be match the number of frames" if smooth_root_2d is None: # substitute the smooth root 2d with the real root smooth_root_2d = global_joints_positions[:, skeleton.root_idx, [0, 2]] # root y: from smooth or pelvis is the same self.root_y_pos = global_joints_positions[:, skeleton.root_idx, 1] self.global_joints_positions = global_joints_positions self.global_joints_rots = global_joints_rots self.global_root_heading = compute_global_heading(global_joints_positions, skeleton) self.smooth_root_2d = smooth_root_2d def update_constraints(self, data_dict: dict, index_dict: dict) -> None: """Append global positions, smooth root 2D, root y, and global heading to data/index dicts.""" nbjoints = self.skeleton.nbjoints indices_lst = create_pairs( self.frame_indices, torch.arange(nbjoints, device=self.frame_indices.device), ) data_dict["global_joints_positions"].append( self.global_joints_positions.reshape(-1, 3) ) # flatten the global positions index_dict["global_joints_positions"].append(indices_lst) # global rotations are not used here # as we use smooth root, also constraint the smooth root to get the same full body # maybe keep storing the hips offset, if we smooth it ourselves data_dict["smooth_root_2d"].append(self.smooth_root_2d) index_dict["smooth_root_2d"].append(self.frame_indices) # constraint the y pos of the root data_dict["root_y_pos"].append(self.root_y_pos) index_dict["root_y_pos"].append(self.frame_indices) # constraint the global heading data_dict["global_root_heading"].append(self.global_root_heading) index_dict["global_root_heading"].append(self.frame_indices) def crop_move(self, start: int, end: int) -> "FullBodyConstraintSet": """Return a new FullBodyConstraintSet for the cropped frame range [start, end).""" mask = (self.frame_indices >= start) & (self.frame_indices < end) return FullBodyConstraintSet( self.skeleton, self.frame_indices[mask] - start, self.global_joints_positions[mask], self.global_joints_rots[mask], self.smooth_root_2d[mask], ) def get_save_info(self) -> dict: """Return a dict for JSON save: type, frame_indices, local_joints_rot, root_positions, smooth_root_2d.""" local_joints_rot = self.skeleton.global_rots_to_local_rots(self.global_joints_rots) if isinstance(self.skeleton, SOMASkeleton30): local_joints_rot = self.skeleton.to_SOMASkeleton77(local_joints_rot) local_joints_rot = matrix_to_axis_angle(local_joints_rot) root_positions = self.global_joints_positions[:, self.skeleton.root_idx] return { "type": self.name, "frame_indices": self.frame_indices, "local_joints_rot": local_joints_rot, "root_positions": root_positions, "smooth_root_2d": self.smooth_root_2d, } def to( self, device: Optional[Union[str, torch.device]] = None, dtype: Optional[torch.dtype] = None, ) -> "FullBodyConstraintSet": self.frame_indices = _tensor_to(self.frame_indices, device, dtype) self.global_joints_positions = _tensor_to(self.global_joints_positions, device, dtype) self.global_joints_rots = _tensor_to(self.global_joints_rots, device, dtype) self.root_y_pos = _tensor_to(self.root_y_pos, device, dtype) self.global_root_heading = _tensor_to(self.global_root_heading, device, dtype) self.smooth_root_2d = _tensor_to(self.smooth_root_2d, device, dtype) if device is not None and hasattr(self.skeleton, "to"): self.skeleton = self.skeleton.to(device) return self @classmethod def from_dict(cls, skeleton: SkeletonBase, dico: dict) -> "FullBodyConstraintSet": """Build a FullBodyConstraintSet from a dict (e.g. loaded from JSON).""" frame_indices = torch.tensor(dico["frame_indices"]) device = skeleton.device if hasattr(skeleton, "device") else "cpu" local_rot = torch.tensor(dico["local_joints_rot"], device=device) local_rot_mats = axis_angle_to_matrix(local_rot) local_rot_mats = _convert_constraint_local_rots_to_skeleton(local_rot_mats, skeleton) global_joints_rots, global_joints_positions, _ = skeleton.fk( local_rot_mats, torch.tensor(dico["root_positions"], device=device), ) smooth_root_2d = None if "smooth_root_2d" in dico: smooth_root_2d = torch.tensor(dico["smooth_root_2d"], device=device) return cls( skeleton, frame_indices=frame_indices, global_joints_positions=global_joints_positions, global_joints_rots=global_joints_rots, smooth_root_2d=smooth_root_2d, ) class EndEffectorConstraintSet: """Constraint set fixing selected end-effector positions and rotations on given frames.""" name = "end-effector" def __init__( self, skeleton: SkeletonBase, frame_indices: Tensor, global_joints_positions: Tensor, global_joints_rots: Tensor, smooth_root_2d: Optional[Tensor], *, joint_names: list[str], to_crop: bool = False, ) -> None: self.skeleton = skeleton self.frame_indices = frame_indices self.joint_names = joint_names # joint_names are constant for all the frames rot_joint_names, pos_joint_names = self.skeleton.expand_joint_names(self.joint_names) # indexing works for motion_rep with smooth root only (contains pelvis index) self.pos_indices = torch.tensor([self.skeleton.bone_index[jname] for jname in pos_joint_names]) self.rot_indices = torch.tensor([self.skeleton.bone_index[jname] for jname in rot_joint_names]) # if we pass the full smooth root 3D as input if smooth_root_2d is not None and smooth_root_2d.shape[-1] == 3: smooth_root_2d = smooth_root_2d[..., [0, 1]] if to_crop: global_joints_positions = global_joints_positions[frame_indices] global_joints_rots = global_joints_rots[frame_indices] if smooth_root_2d is not None: smooth_root_2d = smooth_root_2d[frame_indices] else: assert len(global_joints_positions) == len( frame_indices ), "The number of global positions should be match the number of frames" assert len(global_joints_rots) == len( frame_indices ), "The number of global joint rotations should be match the number of frames" if smooth_root_2d is not None: assert len(smooth_root_2d) == len( frame_indices ), "The number of smooth root 2d (if specified) should be match the number of frames" if smooth_root_2d is None: # substitute the smooth root 2d with the real root smooth_root_2d = global_joints_positions[:, skeleton.root_idx, [0, 2]] # root y: from smooth or pelvis is the same self.root_y_pos = global_joints_positions[:, skeleton.root_idx, 1] self.global_joints_positions = global_joints_positions self.global_root_heading = compute_global_heading(global_joints_positions, skeleton) self.global_joints_rots = global_joints_rots self.smooth_root_2d = smooth_root_2d def update_constraints(self, data_dict: dict, index_dict: dict) -> None: """Append constrained joint positions/rots, smooth root 2D, root y, and heading to data/index dicts.""" crop_frames_indexing = torch.arange(len(self.frame_indices), device=self.frame_indices.device) # constraint positions pos_indices_real = create_pairs( self.frame_indices, self.pos_indices, ) pos_indices_crop = create_pairs( crop_frames_indexing, self.pos_indices, ) data_dict["global_joints_positions"].append(self.global_joints_positions[tuple(pos_indices_crop.T)]) index_dict["global_joints_positions"].append(pos_indices_real) # constraint rotations rot_indices_real = create_pairs( self.frame_indices, self.rot_indices, ) rot_indices_crop = create_pairs( crop_frames_indexing, self.rot_indices, ) data_dict["global_joints_rots"].append(self.global_joints_rots[tuple(rot_indices_crop.T)]) index_dict["global_joints_rots"].append(rot_indices_real) # as we use smooth root, also constraint the smooth root to get the same full body # maybe keep storing the hips offset, if we smooth it ourselves data_dict["smooth_root_2d"].append(self.smooth_root_2d) index_dict["smooth_root_2d"].append(self.frame_indices) # constraint the y pos of the root data_dict["root_y_pos"].append(self.root_y_pos) index_dict["root_y_pos"].append(self.frame_indices) # constraint the global heading data_dict["global_root_heading"].append(self.global_root_heading) index_dict["global_root_heading"].append(self.frame_indices) def crop_move(self, start: int, end: int) -> "EndEffectorConstraintSet": """Return a new EndEffectorConstraintSet for the cropped frame range [start, end).""" mask = (self.frame_indices >= start) & (self.frame_indices < end) cls = type(self) kwargs = {} if not hasattr(cls, "joint_names"): kwargs["joint_names"] = self.joint_names return cls( self.skeleton, self.frame_indices[mask] - start, self.global_joints_positions[mask], self.global_joints_rots[mask], self.smooth_root_2d[mask], **kwargs, ) def get_save_info(self) -> dict: """Return a dict for JSON save: type, frame_indices, local_joints_rot, root_positions, smooth_root_2d, joint_names.""" local_joints_rot = self.skeleton.global_rots_to_local_rots(self.global_joints_rots) if isinstance(self.skeleton, SOMASkeleton30): local_joints_rot = self.skeleton.to_SOMASkeleton77(local_joints_rot) local_joints_rot = matrix_to_axis_angle(local_joints_rot) root_positions = self.global_joints_positions[:, self.skeleton.root_idx] output = { "type": self.name, "frame_indices": self.frame_indices, "local_joints_rot": local_joints_rot, "root_positions": root_positions, "smooth_root_2d": self.smooth_root_2d, } if not hasattr(self.__class__, "joint_names"): # save the joint_names for this base class # but not for children output["joint_names"] = self.joint_names return output def to( self, device: Optional[Union[str, torch.device]] = None, dtype: Optional[torch.dtype] = None, ) -> "EndEffectorConstraintSet": self.frame_indices = _tensor_to(self.frame_indices, device, dtype) self.pos_indices = _tensor_to(self.pos_indices, device, dtype) self.rot_indices = _tensor_to(self.rot_indices, device, dtype) self.root_y_pos = _tensor_to(self.root_y_pos, device, dtype) self.global_joints_positions = _tensor_to(self.global_joints_positions, device, dtype) self.global_root_heading = _tensor_to(self.global_root_heading, device, dtype) self.global_joints_rots = _tensor_to(self.global_joints_rots, device, dtype) self.smooth_root_2d = _tensor_to(self.smooth_root_2d, device, dtype) if device is not None and hasattr(self.skeleton, "to"): self.skeleton = self.skeleton.to(device) return self @classmethod def from_dict(cls, skeleton: SkeletonBase, dico: dict) -> "EndEffectorConstraintSet": """Build an EndEffectorConstraintSet from a dict (e.g. loaded from JSON).""" frame_indices = torch.tensor(dico["frame_indices"]) device = skeleton.device if hasattr(skeleton, "device") else "cpu" local_rot = torch.tensor(dico["local_joints_rot"], device=device) local_rot_mats = axis_angle_to_matrix(local_rot) local_rot_mats = _convert_constraint_local_rots_to_skeleton(local_rot_mats, skeleton) global_joints_rots, global_joints_positions, _ = skeleton.fk( local_rot_mats, torch.tensor(dico["root_positions"], device=device), ) smooth_root_2d = None if "smooth_root_2d" in dico: smooth_root_2d = torch.tensor(dico["smooth_root_2d"], device=device) kwargs = {} if not hasattr(cls, "joint_names"): kwargs["joint_names"] = dico["joint_names"] return cls( skeleton, frame_indices=frame_indices, global_joints_positions=global_joints_positions, global_joints_rots=global_joints_rots, smooth_root_2d=smooth_root_2d, **kwargs, ) class LeftHandConstraintSet(EndEffectorConstraintSet): """End-effector constraint for the left hand only.""" name = "left-hand" joint_names: list[str] = ["LeftHand"] def __init__(self, *args, **kwargs: dict): super().__init__(*args, joint_names=self.joint_names, **kwargs) class RightHandConstraintSet(EndEffectorConstraintSet): """End-effector constraint for the right hand only.""" name = "right-hand" joint_names: list[str] = ["RightHand"] def __init__(self, *args, **kwargs: dict): super().__init__(*args, joint_names=self.joint_names, **kwargs) class LeftFootConstraintSet(EndEffectorConstraintSet): """End-effector constraint for the left foot only.""" name = "left-foot" joint_names: list[str] = ["LeftFoot"] def __init__(self, *args, **kwargs: dict): super().__init__(*args, joint_names=self.joint_names, **kwargs) class RightFootConstraintSet(EndEffectorConstraintSet): """End-effector constraint for the right foot only.""" name = "right-foot" joint_names: list[str] = ["RightFoot"] def __init__(self, *args, **kwargs: dict): super().__init__(*args, joint_names=self.joint_names, **kwargs) TYPE_TO_CLASS = { "root2d": Root2DConstraintSet, "fullbody": FullBodyConstraintSet, "left-hand": LeftHandConstraintSet, "right-hand": RightHandConstraintSet, "left-foot": LeftFootConstraintSet, "right-foot": RightFootConstraintSet, "end-effector": EndEffectorConstraintSet, } def load_constraints_lst( path_or_data: str | list, skeleton: SkeletonBase, device: Optional[Union[str, torch.device]] = None, dtype: Optional[torch.dtype] = None, ): """Load a list of constraints from JSON path or list of dicts. Args: path_or_data: Path to constraints.json or list of constraint dicts. skeleton: Skeleton instance (used for from_dict). device: If set, move all constraint tensors and skeleton to this device. dtype: If set, cast constraint tensors to this dtype. """ if isinstance(path_or_data, str): saved = load_json(path_or_data) else: saved = path_or_data constraints_lst = [] for el in saved: cls = TYPE_TO_CLASS[el["type"]] c = cls.from_dict(skeleton, el) if device is not None or dtype is not None: c.to(device=device, dtype=dtype) constraints_lst.append(c) return constraints_lst def save_constraints_lst(path: str, constraints_lst: list) -> list | None: """Save a list of constraint sets to a JSON file. Returns None if list is empty. """ if not constraints_lst: print("The constraints lst is empty. Skip saving") return to_save = [] def tensor_to_list(obj): """Recursively convert tensors to lists for JSON serialization.""" if isinstance(obj, Tensor): return obj.cpu().tolist() elif isinstance(obj, dict): return {k: tensor_to_list(v) for k, v in obj.items()} elif isinstance(obj, list): return [tensor_to_list(v) for v in obj] else: return obj for constraint in constraints_lst: constraint_info = constraint.get_save_info() # Convert all tensors to lists for JSON serialization constraint_info = tensor_to_list(constraint_info) to_save.append(constraint_info) save_json(path, to_save) print(f"Saved constraints to {path}") return to_save ================================================ FILE: kimodo/demo/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # ruff: noqa: I001 import argparse from kimodo.model import DEFAULT_MODEL from kimodo.model.registry import resolve_model_name from .app import Demo def main() -> None: parser = argparse.ArgumentParser(description="Run the kimodo demo UI.") parser.add_argument( "--model", type=str, default=DEFAULT_MODEL, help="Default model to load (e.g. Kimodo-SOMA-RP-v1, kimodo-soma-rp, or SOMA).", ) args = parser.parse_args() resolved = resolve_model_name(args.model, "Kimodo") demo = Demo(default_model_name=resolved) demo.run() if __name__ == "__main__": main() ================================================ FILE: kimodo/demo/__main__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Entry point for `python -m kimodo.demo`.""" from kimodo.demo import main if __name__ == "__main__": main() ================================================ FILE: kimodo/demo/app.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import base64 import os import shutil import threading import time from typing import Optional import numpy as np import torch import viser from kimodo.assets import DEMO_ASSETS_ROOT from kimodo.model.load_model import load_model from kimodo.model.registry import resolve_model_name from kimodo.skeleton import SkeletonBase, SOMASkeleton30 from kimodo.tools import load_json from kimodo.viz import viser_utils from kimodo.viz.viser_utils import ( Character, CharacterMotion, EEJointsKeyframeSet, FullbodyKeyframeSet, RootKeyframe2DSet, ) from viser.theme import TitlebarButton, TitlebarConfig, TitlebarImage from . import generation, ui from .config import ( DARK_THEME, DEFAULT_CUR_DURATION, DEFAULT_MODEL, DEFAULT_PLAYBACK_SPEED, DEFAULT_PROMPT, DEMO_UI_QUICK_START_MODAL_MD, EXAMPLES_ROOT_DIR, HF_MODE, LIGHT_THEME, MAX_ACTIVE_USERS, MAX_DURATION, MAX_SESSION_MINUTES, MIN_DURATION, MODEL_EXAMPLES_DIRS, MODEL_NAMES, SERVER_NAME, SERVER_PORT, ) from .embedding_cache import CachedTextEncoder from .queue_manager import QueueManager, UserQueue from .state import ClientSession, ModelBundle class Demo: def __init__(self, default_model_name: str = DEFAULT_MODEL): self.device = "cuda:0" if torch.cuda.is_available() else "cpu" print(f"Using device: {self.device}") self.models: dict[str, ModelBundle] = {} self._text_encoder = None resolved = resolve_model_name(default_model_name, "Kimodo") if resolved not in MODEL_NAMES: raise ValueError(f"Unknown model '{default_model_name}'. Expected one of: {MODEL_NAMES}") self.default_model_name = resolved self.ensure_examples_layout() self.load_model(self.default_model_name) # Serialize GPU-bound generation across all clients self._generation_lock = threading.Lock() self._cuda_healthy = True # Per-client sessions self.client_sessions: dict[int, ClientSession] = {} self.start_direction_markers: dict[int, viser_utils.WaypointMesh] = {} self.grid_handles: dict[int, viser.GridHandle] = {} self.server = viser.ViserServer( host=SERVER_NAME, port=SERVER_PORT, label="Kimodo", enable_camera_keyboard_controls=False, # don't move the camera with the arrow keys ) self.server.scene.world_axes.visible = False # used for debugging self.server.scene.set_up_direction("+y") # Register callbacks for session handling self.server.on_client_connect(self.on_client_connect) self.server.on_client_disconnect(self.on_client_disconnect) # HF mode: queue and session limit if HF_MODE: self.user_queue = UserQueue(MAX_ACTIVE_USERS, MAX_SESSION_MINUTES) self.queue_manager = QueueManager( queue=self.user_queue, server=self.server, setup_demo_for_client=self._setup_demo_for_client, cleanup_session=self._cleanup_session_for_client, ) else: self.user_queue = None self.queue_manager = None # create grid and floor self.floor_len = 20.0 # meters def ensure_examples_layout(self) -> None: os.makedirs(EXAMPLES_ROOT_DIR, exist_ok=True) for model_dir in MODEL_EXAMPLES_DIRS.values(): os.makedirs(model_dir, exist_ok=True) for entry in os.listdir(EXAMPLES_ROOT_DIR): if entry in MODEL_EXAMPLES_DIRS: continue src = os.path.join(EXAMPLES_ROOT_DIR, entry) if not os.path.isdir(src): continue dst = os.path.join( MODEL_EXAMPLES_DIRS.get(DEFAULT_MODEL, next(iter(MODEL_EXAMPLES_DIRS.values()))), entry, ) if not os.path.exists(dst): shutil.move(src, dst) def get_examples_base_dir(self, model_name: str, absolute: bool = True) -> str: return MODEL_EXAMPLES_DIRS[model_name] def load_model(self, model_name: str) -> ModelBundle: if model_name in self.models: return self.models[model_name] print(f"Loading model {model_name}...") try: model = load_model( modelname=model_name, device=self.device, text_encoder=self._text_encoder, ) except Exception as e: print(f"Error loading model: {e}\nMake sure text encoder server is running!") raise e if hasattr(model, "text_encoder"): if self._text_encoder is None: self._text_encoder = model.text_encoder model.text_encoder = CachedTextEncoder(model.text_encoder, model_name=model_name) skeleton = model.motion_rep.skeleton if isinstance(skeleton, SOMASkeleton30): skeleton = skeleton.somaskel77.to(model.device) bundle = ModelBundle( model=model, motion_rep=model.motion_rep, skeleton=skeleton, model_fps=model.motion_rep.fps, ) self.models[model_name] = bundle print(f"Model {model_name} loaded successfully") self.prewarm_embedding_cache(model_name, bundle.model) return bundle def prewarm_embedding_cache(self, model_name: str, model: object) -> None: encoder = getattr(model, "text_encoder", None) if not isinstance(encoder, CachedTextEncoder): return prompt_set = set() prompt_set.add(DEFAULT_PROMPT) examples_dir = MODEL_EXAMPLES_DIRS.get(model_name) if examples_dir and os.path.isdir(examples_dir): for entry in os.listdir(examples_dir): example_dir = os.path.join(examples_dir, entry) if not os.path.isdir(example_dir): continue meta_path = os.path.join(example_dir, "meta.json") if not os.path.exists(meta_path): continue try: meta = load_json(meta_path) except Exception: continue for prompt in meta.get("prompts_text", []): if isinstance(prompt, str): prompt_set.add(prompt) if prompt_set: encoder.prewarm(list(prompt_set)) def build_constraint_tracks( self, client: viser.ClientHandle, skeleton: SkeletonBase ) -> dict[str, viser_utils.ConstraintSet]: return { "Full-Body": FullbodyKeyframeSet( name="Full-Body", server=client, skeleton=skeleton, ), "End-Effectors": EEJointsKeyframeSet( name="End-Effectors", server=client, skeleton=skeleton, ), "2D Root": RootKeyframe2DSet( name="2D Root", server=client, skeleton=skeleton, ), } def set_timeline_defaults(self, timeline, model_fps: float) -> None: timeline.set_defaults( default_text=DEFAULT_PROMPT, default_duration=int(DEFAULT_CUR_DURATION * model_fps - 1), min_duration=int(MIN_DURATION * model_fps - 1), # 2 seconds minimum, max_duration=int( MAX_DURATION * model_fps - 1 # - NB_TRANSITION_FRAMES ), # 10 seconds maximum, minus the transition frames, if needed default_num_frames_zoom=int(1.10 * 10 * model_fps), # a bit more than the max max_frames_zoom=1000, fps=model_fps, ) def _apply_constraint_overlay_visibility(self, session: ClientSession) -> None: """Apply show-all vs show-only-current-frame to constraint overlays.""" only_frame = session.frame_idx if session.show_only_current_constraint else None for constraint in session.constraints.values(): constraint.set_overlay_visibility(only_frame) def set_constraint_tracks_visible(self, session: ClientSession, visible: bool) -> None: timeline = session.client.timeline timeline_data = session.timeline_data if timeline_data.get("constraint_tracks_visible", True) == visible: return with timeline_data["keyframe_update_lock"]: if visible: for track_id, track_info in timeline_data["tracks"].items(): timeline.add_track( track_info["name"], track_type=track_info.get("track_type", "keyframe"), color=track_info.get("color"), height_scale=track_info.get("height_scale", 1.0), uuid=track_id, ) for keyframe_id, keyframe_data in timeline_data["keyframes"].items(): timeline.add_keyframe( track_id=keyframe_data["track_id"], frame=keyframe_data["frame"], value=keyframe_data.get("value"), opacity=keyframe_data.get("opacity", 1.0), locked=keyframe_data.get("locked", False), uuid=keyframe_id, ) for interval_id, interval_data in timeline_data["intervals"].items(): timeline.add_interval( track_id=interval_data["track_id"], start_frame=interval_data["start_frame_idx"], end_frame=interval_data["end_frame_idx"], value=interval_data.get("value"), opacity=interval_data.get("opacity", 1.0), locked=interval_data.get("locked", False), uuid=interval_id, ) else: for track_id in list(timeline_data["tracks"].keys()): timeline.remove_track(track_id) timeline_data["constraint_tracks_visible"] = visible def _cleanup_session_for_client(self, client_id: int) -> None: """Remove session and scene state for a client (e.g. on session expiry).""" if client_id in self.client_sessions: del self.client_sessions[client_id] self.start_direction_markers.pop(client_id, None) self.grid_handles.pop(client_id, None) def _setup_demo_for_client(self, client: viser.ClientHandle) -> None: """Initialize scene, GUI, and session state for a client (no modals).""" self.setup_scene(client) model_bundle = self.load_model(self.default_model_name) # Initialize each empty constraint track constraint_tracks = self.build_constraint_tracks(client, model_bundle.skeleton) # Create GUI elements for this client ( gui_elements, timeline_tracks, example_dict, gui_examples_dropdown, gui_save_example_path_text, gui_model_selector, ) = ui.create_gui( demo=self, client=client, model_name=self.default_model_name, model_fps=model_bundle.model_fps, ) timeline_data = { "tracks": timeline_tracks, "tracks_ids": {val["name"]: key for key, val in timeline_tracks.items()}, "keyframes": {}, "intervals": {}, "keyframe_update_lock": threading.Lock(), "keyframe_move_timers": {}, "pending_keyframe_moves": {}, # keyframe_id -> new_frame "constraint_tracks_visible": True, "dense_path_after_release_timer": None, } # Initialize session state cur_duration = DEFAULT_CUR_DURATION max_frame_idx = int(cur_duration * model_bundle.model_fps - 1) session = ClientSession( client=client, gui_elements=gui_elements, motions={}, constraints=constraint_tracks, timeline_data=timeline_data, frame_idx=0, playing=False, playback_speed=DEFAULT_PLAYBACK_SPEED, cur_duration=cur_duration, max_frame_idx=max_frame_idx, updating_motions=False, edit_mode=False, model_name=self.default_model_name, model_fps=model_bundle.model_fps, skeleton=model_bundle.skeleton, motion_rep=model_bundle.motion_rep, examples_base_dir=self.get_examples_base_dir(self.default_model_name, absolute=True), example_dict=example_dict, gui_examples_dropdown=gui_examples_dropdown, gui_save_example_path_text=gui_save_example_path_text, gui_model_selector=gui_model_selector, ) self.client_sessions[client.client_id] = session # Initialize default character for this client self.add_character_motion(client, session.skeleton) def on_client_connect(self, client: viser.ClientHandle) -> None: """Initialize GUI and state for each new client.""" print(f"Client {client.client_id} connected") if HF_MODE and self.queue_manager is not None: self.queue_manager.on_client_connect(client) else: # Show quick start popup when a browser client connects (non-HF mode). with client.gui.add_modal( "Welcome — Quick Start", size="xl", show_close_button=True, save_choice="kimodo.demo.quick_start_ack", ) as modal: client.gui.add_markdown(DEMO_UI_QUICK_START_MODAL_MD) client.gui.add_button("Got it (don't remind me again)").on_click(lambda _event: modal.close()) self._setup_demo_for_client(client) def setup_scene(self, client: viser.ClientHandle) -> None: self.configure_theme(client) client.camera.position = np.array( [2.7417358737841426, 1.8790455698853281, 7.675741569777456], dtype=np.float64, ) client.camera.look_at = np.array([0.0, 0.0, 0.0], dtype=np.float64) client.camera.up_direction = np.array( [-1.1102230246251568e-16, 1.0, 1.3596310734468913e-32], dtype=np.float64, ) client.camera.fov = np.deg2rad(45.0) grid_handle = client.scene.add_grid( "/grid", width=self.floor_len, height=self.floor_len, wxyz=viser.transforms.SO3.from_x_radians(-np.pi / 2.0).wxyz, position=(0.0, 0.0001, 0.0), fade_distance=3 * self.floor_len, section_color=LIGHT_THEME["grid"], infinite_grid=True, ) self.grid_handles[client.client_id] = grid_handle # marker for origin origin_waypoint = viser_utils.WaypointMesh( "/origin_waypoint", client, position=np.array([0.0, 0.0, 0.0]), heading=np.array([0.0, 1.0]), color=(0, 0, 255), ) self.start_direction_markers[client.client_id] = origin_waypoint def on_client_disconnect(self, client: viser.ClientHandle) -> None: """Clean up when client disconnects.""" print(f"Client {client.client_id} disconnected") client_id = client.client_id if HF_MODE and self.queue_manager is not None: self.queue_manager.on_client_disconnect(client_id) self._cleanup_session_for_client(client_id) def set_start_direction_visible(self, client_id: int, visible: bool) -> None: marker = self.start_direction_markers.get(client_id) if marker is None: return marker.set_visible(visible) def client_active(self, client_id: int) -> bool: return client_id in self.client_sessions def add_character_motion( self, client: viser.ClientHandle, skeleton: SkeletonBase, joints_pos: Optional[torch.Tensor] = None, joints_rot: Optional[torch.Tensor] = None, foot_contacts: Optional[torch.Tensor] = None, ) -> None: client_id = client.client_id if not self.client_active(client_id): return session = self.client_sessions[client_id] ci = len(session.motions) character_name = f"character{ci}" # build character skeleton and skinning mesh if "g1" in session.model_name: mesh_mode = "g1_stl" elif "smplx" in session.model_name: mesh_mode = "smplx_skin" elif "soma" in session.model_name: if session.gui_elements.gui_use_soma_layer_checkbox.value: mesh_mode = "soma_layer_skin" else: mesh_mode = "soma_skin" else: raise ValueError("The model name is not recognized for skinning.") new_character = Character( character_name, client, skeleton, create_skeleton_mesh=True, create_skinned_mesh=True, visible_skeleton=False, # don't show immediately visible_skinned_mesh=False, # don't show immediately skinned_mesh_opacity=session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value, show_foot_contacts=session.gui_elements.gui_viz_foot_contacts_checkbox.value, dark_mode=session.gui_elements.gui_dark_mode_checkbox.value, mesh_mode=mesh_mode, gui_use_soma_layer_checkbox=session.gui_elements.gui_use_soma_layer_checkbox, ) # if no motion given, initialize to character default (rest) pose for one frame init_joints_pos, init_joints_rot = new_character.get_pose() if joints_pos is None: joints_pos = init_joints_pos[None].repeat(session.max_frame_idx + 1, 1, 1) if joints_rot is None: joints_rot = init_joints_rot[None].repeat(session.max_frame_idx + 1, 1, 1, 1) new_motion = CharacterMotion(new_character, joints_pos, joints_rot, foot_contacts) # save the motion in our dict session.motions[character_name] = new_motion # put the character at the right frame new_motion.set_frame(session.frame_idx) # put them visible with a small delay # so that the set_frame function has time to finish def _set_visibility(): new_motion.character.set_skinned_mesh_visibility(session.gui_elements.gui_viz_skinned_mesh_checkbox.value) new_motion.character.set_skeleton_visibility(session.gui_elements.gui_viz_skeleton_checkbox.value) timer = threading.Timer( 0.2, # 0.2s delay _set_visibility, ) timer.start() def clear_motions(self, client_id: int) -> None: if not self.client_active(client_id): return session = self.client_sessions[client_id] for motion in list(session.motions.values()): motion.clear() session.motions.clear() def compute_model_constraints_lst( self, session: ClientSession, model_bundle: ModelBundle, num_frames: int, ): return generation.compute_model_constraints_lst(session, model_bundle, num_frames, self.device) def check_cuda_health(self) -> bool: """Check if CUDA is still functional. Trigger auto-restart if corrupted. """ if self.device == "cpu": return True try: torch.tensor([1.0], device=self.device) + torch.tensor([1.0], device=self.device) return True except RuntimeError as e: if "device-side assert" in str(e) or "CUDA error" in str(e): if self._cuda_healthy: self._cuda_healthy = False print("FATAL: CUDA context is corrupted (device-side assert). " "The process must be restarted.") self._trigger_restart() return False raise def _trigger_restart(self) -> None: """Exit the process so the HF Space (or systemd/Docker) can restart it.""" import sys print("Initiating automatic restart due to unrecoverable CUDA error...") sys.stdout.flush() sys.stderr.flush() os._exit(1) def generate( self, client: viser.ClientHandle, prompts: list[str], num_frames: list[int], num_samples: int, seed: int, diffusion_steps: int, cfg_weight: Optional[list[float]] = None, cfg_type: Optional[str] = None, postprocess_parameters: Optional[dict] = None, transitions_parameters: Optional[dict] = None, real_robot_rotations: bool = False, ) -> None: if not self._cuda_healthy: raise RuntimeError("CUDA is in a corrupted state. The space is restarting...") locked = self._generation_lock.acquire(blocking=False) if not locked: waiting_notif = client.add_notification( title="Waiting for GPU...", body="Another generation is in progress. Yours will start automatically.", loading=True, with_close_button=False, ) self._generation_lock.acquire() waiting_notif.remove() try: session = self.client_sessions[client.client_id] model_bundle = self.load_model(session.model_name) generation.generate( client=client, session=session, model_bundle=model_bundle, prompts=prompts, num_frames=num_frames, num_samples=num_samples, seed=seed, diffusion_steps=diffusion_steps, cfg_weight=cfg_weight, cfg_type=cfg_type, postprocess_parameters=postprocess_parameters, transitions_parameters=transitions_parameters, real_robot_rotations=real_robot_rotations, device=self.device, clear_motions=self.clear_motions, add_character_motion=self.add_character_motion, ) finally: self._generation_lock.release() def set_frame(self, client_id: int, frame_idx: int, update_timeline: bool = True): if not self.client_active(client_id): return session = self.client_sessions[client_id] session.frame_idx = frame_idx if update_timeline: session.client.timeline.set_current_frame(frame_idx) for motion in list(session.motions.values()): motion.set_frame(frame_idx) self._apply_constraint_overlay_visibility(session) def run(self) -> None: update_counter = 0 cuda_check_interval = 300 while True: last_update_time = time.time() if self.models: # the max playback speed is 2x the model fps (from gui_playback_speed_buttons) playback_fps = max(bundle.model_fps for bundle in self.models.values()) * 2.0 else: playback_fps = 60.0 # update each client session independently # copy to a list first to avoid changing size if client disconnects for client_id, session in list(self.client_sessions.items()): update_interval = int(playback_fps / (session.playback_speed * session.model_fps)) new_frame_idx = session.frame_idx if session.playing and update_counter % update_interval == 0: if session.frame_idx >= session.max_frame_idx: new_frame_idx = 0 else: new_frame_idx = session.frame_idx + 1 # make sure the client is still active before updating the frame if self.client_active(client_id): self.set_frame(client_id, new_frame_idx) if update_counter % cuda_check_interval == 0: self.check_cuda_health() time_remaining = max(0, 1.0 / playback_fps - (time.time() - last_update_time)) time.sleep(time_remaining) update_counter += 1 update_counter %= playback_fps # wrap around to 0 every second def configure_theme( self, client: viser.ClientHandle, dark_mode: bool = False, titlebar_dark_mode_checkbox_uuid: str | None = None, ): # Sync grid color with theme (light vs dark) theme = DARK_THEME if dark_mode else LIGHT_THEME grid_handle = self.grid_handles.get(client.client_id) if grid_handle is not None: grid_handle.section_color = theme["grid"] # # setup theme # buttons = ( TitlebarButton( text="Documentation", icon="Description", href="https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html", ), TitlebarButton( text="Project Page", icon=None, href="https://research.nvidia.com/labs/sil/projects/kimodo/", ), TitlebarButton( text="Github", icon="GitHub", href="https://github.com/nv-tlabs/kimodo", ), ) assets_dir = DEMO_ASSETS_ROOT logo_light_path = assets_dir / "nvidia_logo.png" logo_dark_path = assets_dir / "nvidia_logo_dark.png" if logo_light_path.exists(): light_b64 = base64.standard_b64encode(logo_light_path.read_bytes()).decode("ascii") dark_b64 = ( base64.standard_b64encode(logo_dark_path.read_bytes()).decode("ascii") if logo_dark_path.exists() else None ) image = TitlebarImage( image_url_light=f"data:image/png;base64,{light_b64}", image_url_dark=(f"data:image/png;base64,{dark_b64}" if dark_b64 else None), image_alt="NVIDIA", href="https://www.nvidia.com/", ) else: image = None titlebar_theme = TitlebarConfig(buttons=buttons, image=image, title_text="Kimodo") client.gui.set_panel_label("Kimodo") client.gui.configure_theme( titlebar_content=titlebar_theme, control_layout="floating", # "floating", # ['floating', 'collapsible', 'fixed'] control_width="large", # ['small', 'medium', 'large'] dark_mode=dark_mode, show_logo=False, # hide viser logo on bottom left corner show_share_button=False, titlebar_dark_mode_checkbox_uuid=titlebar_dark_mode_checkbox_uuid, brand_color=(152, 189, 255), # (60, 131, 0), # (R, G, B) tuple ) ================================================ FILE: kimodo/demo/config.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import os from kimodo.assets import DEMO_EXAMPLES_ROOT from kimodo.model.registry import ( AVAILABLE_MODELS, DEFAULT_MODEL, FRIENDLY_NAMES, get_datasets, get_model_info, get_models_for_dataset_skeleton, get_short_key_from_display_name, get_skeleton_display_name, get_skeleton_display_names_for_dataset, get_skeleton_key_from_display_name, get_skeletons_for_dataset, get_versions_for_dataset_skeleton, resolve_to_short_key, ) SERVER_NAME = os.environ.get("SERVER_NAME", "0.0.0.0") SERVER_PORT = int(os.environ.get("SERVER_PORT", "7860")) HF_MODE = os.environ.get("HF_MODE", False) # HF mode: user queue and session limit (override via env in Spaces) MAX_ACTIVE_USERS = int(os.environ.get("MAX_ACTIVE_USERS", "5")) MAX_SESSION_MINUTES = float(os.environ.get("MAX_SESSION_MINUTES", "5.0")) DEFAULT_PLAYBACK_SPEED = 1.0 # default start duration is 6.0 sec, but model can handle up to 10 sec DEFAULT_CUR_DURATION = 6.0 DEFAULT_PROMPT = "A person walks forward." MIN_DURATION = 2.0 MAX_DURATION = 10.0 SHOW_TRANSITION_PARAMS = True INIT_POSTPROCESSING = True NB_TRANSITION_FRAMES = 5 LIGHT_THEME = dict( floor=(220, 220, 220), grid=(180, 180, 180), ) # Dark theme: slightly lighter grid and floor for better visibility and less flat black DARK_THEME = dict( floor=(48, 48, 52), grid=(105, 105, 110), ) EXAMPLES_ROOT_DIR = str(DEMO_EXAMPLES_ROOT) # Model list and paths from kimodo registry (all models: Kimodo + TMR) MODEL_NAMES = tuple(AVAILABLE_MODELS) MODEL_EXAMPLES_DIRS = {name: os.path.join(EXAMPLES_ROOT_DIR, name) for name in MODEL_NAMES} # Display labels for backward compatibility (short_key -> display name) MODEL_LABELS = {name: FRIENDLY_NAMES.get(name, f"Model ({name})") for name in MODEL_NAMES} MODEL_LABEL_TO_NAME = {label: name for name, label in MODEL_LABELS.items()} # ----------------------------------------------------------------------------- # Demo UI copy # ----------------------------------------------------------------------------- DEMO_UI_QUICK_START_CORE_MD = """ ### Camera - **Left-drag**: rotate - **Right-drag**: pan - **Scroll**: zoom ### Playback - **Space** to play/pause - **←/→** to step frames, or click the frame number. - **Scroll up/down** in the timeline: move left/right - **Shift + scroll** in the timeline: zoom in/out ### Prompts - **Double-click** a text prompt to edit it. - **Click and drag** the right edge of a prompt box to extend/shorten it. - **Click empty space** to add a prompt. - **Right-click** a prompt to delete it. ### Generate - Go to the **Generate** tab to modify options - It is also possible to **load** examples - Click **Generate** to generate a motion ### Constraints - This is **optional**: should be use after a first generation - **Click** in the timeline tracks (Full-Body / 2D root etc) to add a constraint. - **Right-click** on a constraint to delete it. - To **edit** a constraint: - Move playback to the target frame - Click **Enter Editing Mode** in the Constraints tab. """ DEMO_UI_QUICK_START_MODAL_MD = ( DEMO_UI_QUICK_START_CORE_MD + """ See the **Instructions** tab for the full user manual. """ ) DEMO_UI_INSTRUCTIONS_TAB_MD = ( """ ## How to Use This Demo """ + DEMO_UI_QUICK_START_CORE_MD + """ --- ### Generating Motion (step-by-step) 1. **Edit the text prompts** in the timeline (e.g., "A person walks forward.") 2. **Modify the duration** by moving the right edge of each prompts (2–10 seconds) 3. **Add constraints** (optional) to control the motion: - Click **Enter Editing Mode** to adjust the character pose - Use the timeline to place keyframes or intervals in constraint tracks (see below) 4. **Click Generate** to create the motion 5. If generating multiple samples, **click on a mesh** to select which one to keep ### Timeline Editing **Adding Constraints:** 1. Click anywhere on the timeline to add a keyframe at that frame. The keyframe is created based on the current character motion. 2. Ctrl/Cmd+click+drag to add an interval constraint, or expand a keyframe into an interval 3. Enter editing mode with the **Enter Editing Mode** button to adjust character pose before/after adding constraints. **Constraint Types:** - **Full-Body**: constrains the entire character pose - **2D Root**: constrains the character's path on the ground plane - Enable **Densify** to create a continuous path - **End-Effectors**: constrains hands and feet positions - Use separate tracks for Left/Right Hand/Foot **Moving & Deleting:** - **Drag keyframes/intervals** to move them to different frames - **Right-click** a keyframe or interval to delete it - Use **Clear All Constraints** to remove everything **Tips:** - The posing skeleton becomes visible in editing mode for precise positioning - Use **Snap to constraint** to align the current frame to a constraint ### Saving & Loading You can save the current constraints or current motion to load in later from the Load/Save menu. Saving an **Example** will save the full constraints, motion, and generation metadata. ### Visualization Options Switch to the **Visualize** tab to: - Toggle mesh and skeleton visibility - Adjust mesh opacity - Show/hide foot contact indicators - Switch between light and dark modes """ ) ================================================ FILE: kimodo/demo/embedding_cache.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import contextlib import contextvars import hashlib import json import os import threading import time from collections import OrderedDict from dataclasses import dataclass from typing import Iterable, Optional import numpy as np import torch from kimodo.sanitize import sanitize_texts _ACTIVE_SESSION = contextvars.ContextVar("kimodo_demo_active_session", default=None) @dataclass class CacheStats: hits: int = 0 misses: int = 0 disk_hits: int = 0 class EmbeddingCache: """Disk-backed text embedding cache with a small in-memory LRU.""" def __init__( self, *, model_name: str, encoder_id: str, base_dir: Optional[str] = None, max_mem_entries: int = 128, ) -> None: cache_root = base_dir or os.environ.get( "kimodo_EMBED_CACHE_DIR", os.path.join("~", ".cache", "kimodo_demo", "embeddings"), ) self.base_dir = os.path.expanduser(cache_root) self.model_name = model_name self.encoder_id = encoder_id self.max_mem_entries = max_mem_entries self.stats = CacheStats() self._lock = threading.Lock() self._mem_cache: OrderedDict[str, np.ndarray] = OrderedDict() self._index = {} self._index_loaded = False def _model_dir(self) -> str: return os.path.join(self.base_dir, self.model_name) def _index_path(self) -> str: return os.path.join(self._model_dir(), "index.json") def _prewarm_marker_path(self, key: str) -> str: return os.path.join(self._model_dir(), f"prewarm_{key}.json") def has_prewarm_marker(self, key: str) -> bool: return os.path.exists(self._prewarm_marker_path(key)) def write_prewarm_marker(self, key: str, *, prompt_count: int) -> None: os.makedirs(self._model_dir(), exist_ok=True) payload = {"prompt_count": prompt_count, "updated_at": time.time()} tmp_path = f"{self._prewarm_marker_path(key)}.tmp" with open(tmp_path, "w", encoding="utf-8") as f: json.dump(payload, f) os.replace(tmp_path, self._prewarm_marker_path(key)) def _load_index(self) -> None: if self._index_loaded: return index_path = self._index_path() if os.path.exists(index_path): try: with open(index_path, "r", encoding="utf-8") as f: self._index = json.load(f) except json.JSONDecodeError: self._index = {} self._index_loaded = True def _save_index(self) -> None: os.makedirs(self._model_dir(), exist_ok=True) tmp_path = f"{self._index_path()}.tmp" with open(tmp_path, "w", encoding="utf-8") as f: json.dump(self._index, f) os.replace(tmp_path, self._index_path()) def _make_key(self, text: str) -> str: key_src = f"{self.model_name}|{self.encoder_id}|{text}" return hashlib.sha256(key_src.encode("utf-8")).hexdigest() def _entry_path(self, key: str) -> str: return os.path.join(self._model_dir(), f"{key}.npy") def _mem_get(self, key: str) -> Optional[np.ndarray]: if key in self._mem_cache: self._mem_cache.move_to_end(key) return self._mem_cache[key] return None def _mem_put(self, key: str, value: np.ndarray) -> None: self._mem_cache[key] = value self._mem_cache.move_to_end(key) while len(self._mem_cache) > self.max_mem_entries: self._mem_cache.popitem(last=False) def _disk_load(self, key: str) -> Optional[np.ndarray]: path = self._entry_path(key) if not os.path.exists(path): return None try: return np.load(path) except Exception: return None def _disk_save(self, key: str, value: np.ndarray) -> None: os.makedirs(self._model_dir(), exist_ok=True) np.save(self._entry_path(key), value) self._index[key] = { "length": int(value.shape[0]), "dtype": str(value.dtype), "updated_at": time.time(), } def _maybe_use_session_cache(self, texts: list[str]): session = _ACTIVE_SESSION.get() if session is None: return None if session.last_prompt_texts == texts and session.last_prompt_embeddings is not None: return session.last_prompt_embeddings, session.last_prompt_lengths return None def _update_session_cache(self, texts: list[str], tensor: torch.Tensor, lengths: list[int]) -> None: session = _ACTIVE_SESSION.get() if session is None: return session.last_prompt_texts = texts session.last_prompt_embeddings = tensor session.last_prompt_lengths = lengths def get_or_encode(self, texts: Iterable[str], encoder): if isinstance(texts, str): texts = [texts] texts = sanitize_texts(list(texts)) if len(texts) == 0: empty = torch.empty() return empty, [] session_cache = self._maybe_use_session_cache(texts) if session_cache is not None: return session_cache arrays: list[Optional[np.ndarray]] = [None] * len(texts) lengths: list[int] = [0] * len(texts) misses: list[tuple[int, str, str]] = [] with self._lock: self._load_index() for idx, text in enumerate(texts): key = self._make_key(text) cached = self._mem_get(key) if cached is not None: arrays[idx] = cached lengths[idx] = cached.shape[0] self.stats.hits += 1 continue cached = self._disk_load(key) if cached is not None: arrays[idx] = cached lengths[idx] = cached.shape[0] self._mem_put(key, cached) self.stats.disk_hits += 1 continue misses.append((idx, text, key)) self.stats.misses += 1 if misses: miss_texts = [text for _, text, _ in misses] miss_tensor, miss_lengths = encoder(miss_texts) miss_tensor = miss_tensor.detach().cpu() miss_tensor_np = miss_tensor.numpy() with self._lock: self._load_index() for miss_idx, length in enumerate(miss_lengths): idx, _text, key = misses[miss_idx] arr = miss_tensor_np[miss_idx, :length].copy() arrays[idx] = arr lengths[idx] = int(length) self._mem_put(key, arr) self._disk_save(key, arr) self._save_index() max_len = max(lengths) if lengths else 0 feat_dim = arrays[0].shape[-1] if arrays[0] is not None else 0 dtype = arrays[0].dtype if arrays[0] is not None else np.float32 padded = np.zeros((len(texts), max_len, feat_dim), dtype=dtype) for idx, arr in enumerate(arrays): if arr is None: continue padded[idx, : arr.shape[0]] = arr result = torch.from_numpy(padded) self._update_session_cache(texts, result, lengths) return result, lengths class CachedTextEncoder: """Wrapper around a text encoder to add disk-backed caching.""" def __init__(self, encoder, *, model_name: str, base_dir: Optional[str] = None): self.encoder = encoder self.model_name = model_name encoder_id = f"{type(encoder).__name__}" self.cache = EmbeddingCache(model_name=model_name, encoder_id=encoder_id, base_dir=base_dir) def __call__(self, texts): return self.cache.get_or_encode(texts, self.encoder) def prewarm(self, texts) -> None: if isinstance(texts, str): texts = [texts] texts = sanitize_texts(list(texts)) prewarm_key = hashlib.sha256("|".join(texts).encode("utf-8")).hexdigest() if self.cache.has_prewarm_marker(prewarm_key): return self.cache.get_or_encode(texts, self.encoder) self.cache.write_prewarm_marker(prewarm_key, prompt_count=len(texts)) def to(self, device=None, dtype=None): if hasattr(self.encoder, "to"): self.encoder.to(device=device, dtype=dtype) return self @contextlib.contextmanager def session_context(self, session): token = _ACTIVE_SESSION.set(session) try: yield finally: _ACTIVE_SESSION.reset(token) def __getattr__(self, name): return getattr(self.encoder, name) ================================================ FILE: kimodo/demo/generation.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from collections import defaultdict from typing import Optional import numpy as np import torch import viser from kimodo.constraints import ( TYPE_TO_CLASS, FullBodyConstraintSet, Root2DConstraintSet, ) from kimodo.exports.mujoco import apply_g1_real_robot_projection from kimodo.skeleton import G1Skeleton34, SOMASkeleton30 from kimodo.tools import seed_everything from .embedding_cache import CachedTextEncoder from .state import ClientSession, ModelBundle def compute_model_constraints_lst( session: ClientSession, model_bundle: ModelBundle, num_frames: int, device: str, ): """Compute the lst of constraints for the model based on the constraints in viser.""" assert len(session.motions) == 1, "Only one motion allowed for constrained generation" if not session.constraints: return [] model_skeleton = model_bundle.model.skeleton # For SOMA, UI uses somaskel77; extract 30-joint subset for the model use_skel_slice = isinstance(model_skeleton, SOMASkeleton30) and session.skeleton.nbjoints != model_skeleton.nbjoints skel_slice = model_skeleton.get_skel_slice(session.skeleton) if use_skel_slice else None dense_smooth_root_pos_2d = None if session.constraints["2D Root"].dense_path: # get the full 2d root dense_smooth_root_pos_2d = session.constraints["2D Root"].get_constraint_info(device=device)["root_pos"][ :, [0, 2] ] model_constraints = [] for track_name, constraint in session.constraints.items(): constraint_info = constraint.get_constraint_info(device=device) frame_idx = constraint_info["frame_idx"] # drop any constraints outside the generation range valid_info = [(i, fi) for i, fi in enumerate(frame_idx) if fi < num_frames] valid_idx = [i for i, _ in valid_info] valid_frame_idx = [fi for _, fi in valid_info] if len(valid_frame_idx) == 0: continue frame_indices = torch.tensor(valid_frame_idx) if track_name == "2D Root": smooth_root_pos_2d = constraint_info["root_pos"][valid_idx][:, [0, 2]].to(device) # same as "smooth_root_2d" model_constraints.append( Root2DConstraintSet( model_skeleton, frame_indices, smooth_root_pos_2d, ) ) elif track_name == "Full-Body": constraint_joints_pos = constraint_info["joints_pos"][valid_idx].to(device) constraint_joints_rot = constraint_info["joints_rot"][valid_idx].to(device) if skel_slice is not None: constraint_joints_pos = constraint_joints_pos[:, skel_slice] constraint_joints_rot = constraint_joints_rot[:, skel_slice] smooth_root_pos_2d = None if dense_smooth_root_pos_2d is not None: smooth_root_pos_2d = dense_smooth_root_pos_2d[frame_indices] model_constraints.append( FullBodyConstraintSet( model_skeleton, frame_indices, constraint_joints_pos, constraint_joints_rot, smooth_root_2d=smooth_root_pos_2d, ) ) elif track_name == "End-Effectors": constraint_joints_pos = constraint_info["joints_pos"][valid_idx].to(device) constraint_joints_rot = constraint_info["joints_rot"][valid_idx].to(device) if skel_slice is not None: constraint_joints_pos = constraint_joints_pos[:, skel_slice] constraint_joints_rot = constraint_joints_rot[:, skel_slice] end_effector_type_set_lst = [ end_effector_type_set for i, end_effector_type_set in enumerate(constraint_info["end_effector_type"]) if i in valid_idx ] # regroup the end effector data by type cls_idx = defaultdict(list) for idx, end_effector_type_set in enumerate(end_effector_type_set_lst): for end_effector_type in end_effector_type_set: cls_idx[TYPE_TO_CLASS[end_effector_type]].append(idx) for cls, lst_idx in cls_idx.items(): frame_indices_cls = frame_indices[lst_idx] smooth_root_pos_2d = None if dense_smooth_root_pos_2d is not None: smooth_root_pos_2d = dense_smooth_root_pos_2d[frame_indices_cls] constraint_joints_pos_el = constraint_joints_pos[lst_idx] constraint_joints_rot_el = constraint_joints_rot[lst_idx] model_constraints.append( cls( model_skeleton, frame_indices_cls, constraint_joints_pos_el, constraint_joints_rot_el, smooth_root_2d=smooth_root_pos_2d, ) ) else: raise ValueError(f"Unsupported constraint type: {constraint.display_name}") return model_constraints def generate( *, client: viser.ClientHandle, session: ClientSession, model_bundle: ModelBundle, prompts: list[str], num_frames: list[int], num_samples: int, seed: int, diffusion_steps: int, cfg_weight: Optional[list[float]] = None, cfg_type: Optional[str] = None, postprocess_parameters: Optional[dict] = None, transitions_parameters: Optional[dict] = None, real_robot_rotations: bool = False, device: str, clear_motions, add_character_motion, ) -> None: client_id = client.client_id print( f"Generating {num_samples} samples for a total of {sum(num_frames)} frames with those prompt: {prompts} (client {client_id})" ) seed_everything(seed) model_constraints = compute_model_constraints_lst(session, model_bundle, sum(num_frames), device) cfg_weight = cfg_weight or [2.0, 2.0] postprocess_parameters = postprocess_parameters or {} transitions_parameters = transitions_parameters or {} encoder = getattr(model_bundle.model, "text_encoder", None) if isinstance(encoder, CachedTextEncoder): with encoder.session_context(session): pred_joints_output = model_bundle.model( prompts, num_frames, diffusion_steps, multi_prompt=True, constraint_lst=model_constraints, cfg_weight=cfg_weight, num_samples=num_samples, cfg_type=cfg_type, **(postprocess_parameters | transitions_parameters), ) # [B, T, motion_rep_dim] else: pred_joints_output = model_bundle.model( prompts, num_frames, diffusion_steps, multi_prompt=True, constraint_lst=model_constraints, cfg_weight=cfg_weight, num_samples=num_samples, cfg_type=cfg_type, **(postprocess_parameters | transitions_parameters), ) # [B, T, motion_rep_dim] joints_pos = pred_joints_output["posed_joints"] # [B, T, J, 3] joints_rot = pred_joints_output["global_rot_mats"] foot_contacts = pred_joints_output.get("foot_contacts") # Optionally project G1 to real robot DoF (1-DoF per joint, clamped) for display. if real_robot_rotations and isinstance(session.skeleton, G1Skeleton34): joints_pos, joints_rot = apply_g1_real_robot_projection( session.skeleton, pred_joints_output["posed_joints"], pred_joints_output["global_rot_mats"], clamp_to_limits=True, ) # Display on characters (callbacks keep this module UI-agnostic). clear_motions(client_id) # Keep one sample centered at the origin so constraints align. spread_factor = 1.0 # meters center_idx = num_samples // 2 x_trans = (np.arange(num_samples) - center_idx) * spread_factor for i in range(num_samples): cur_joints_pos = joints_pos[i] cur_joints_pos[..., 0] += x_trans[i] add_character_motion( client, session.skeleton, cur_joints_pos, joints_rot[i], foot_contacts[i], ) ================================================ FILE: kimodo/demo/queue_manager.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """HF mode user queue and session time limit.""" import math import threading import time from collections.abc import Callable from typing import Any import viser from .config import DEMO_UI_QUICK_START_MODAL_MD, MAX_SESSION_MINUTES # Link for "Duplicate this Space" on Hugging Face (used in queue and expiry modals). DUPLICATE_SPACE_URL = "https://huggingface.co/spaces/nvidia/Kimodo?duplicate=true" GITHUB_REPO_URL = "https://github.com/nv-tlabs/kimodo" # How often to refresh queue modal content (position, total, estimated wait). QUEUE_MODAL_REFRESH_INTERVAL_SEC = 15 class UserQueue: """Thread-safe queue: active users (with activation timestamp) and waiting queue.""" def __init__(self, max_active: int, max_minutes: float) -> None: self._max_active = max_active self._max_minutes = max_minutes self._max_seconds = max_minutes * 60.0 self._active: dict[int, float] = {} # client_id -> activation timestamp self._queued: list[int] = [] self._lock = threading.Lock() def try_activate(self, client_id: int) -> bool: """If a slot is free, add client as active and return True. Else return False. """ with self._lock: if len(self._active) < self._max_active: self._active[client_id] = time.time() return True return False def enqueue(self, client_id: int) -> None: with self._lock: if client_id not in self._queued: self._queued.append(client_id) def remove(self, client_id: int) -> bool: """Remove from active or queue. Returns True if was active. """ with self._lock: was_active = client_id in self._active self._active.pop(client_id, None) if client_id in self._queued: self._queued.remove(client_id) return was_active def promote_next(self) -> int | None: """If queue non-empty, pop first, activate them, return their client_id. Else None. """ with self._lock: if not self._queued: return None client_id = self._queued.pop(0) self._active[client_id] = time.time() return client_id def get_queue_position(self, client_id: int) -> tuple[int, int] | None: """(1-based position, total_in_queue) or None if not queued.""" with self._lock: if client_id not in self._queued: return None pos = self._queued.index(client_id) return (pos + 1, len(self._queued)) def get_estimated_wait_seconds(self, client_id: int) -> float: """Estimated seconds until this queued client gets a slot.""" with self._lock: if client_id not in self._queued: return 0.0 pos = self._queued.index(client_id) + 1 # 1-based # Expiry times of active users (when they free a slot) now = time.time() expiries = sorted(now + self._max_seconds - (now - t) for t in self._active.values()) if not expiries: return 0.0 # Nth slot to free (1-indexed) wraps over expiries idx = (pos - 1) % len(expiries) cycles = (pos - 1) // len(expiries) slot_free_time = expiries[idx] + cycles * self._max_seconds return max(0.0, slot_free_time - now) def is_active(self, client_id: int) -> bool: with self._lock: return client_id in self._active def was_active(self, client_id: int) -> bool: """True if client is currently active (for use when already holding lock).""" return client_id in self._active def _format_wait(seconds: float) -> str: if seconds < 60: return "less than a minute" mins = int(math.ceil(seconds / 60)) return f"~{mins} minute{'s' if mins != 1 else ''}" def _queue_modal_markdown(position: int, total: int, estimated_wait_sec: float) -> str: wait_str = _format_wait(estimated_wait_sec) mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES return f"""## Kimodo Demo — Please Wait This demo runs with limited capacity. Each user gets **{mins} minute{"s" if mins != 1 else ""}** of interactive time. **Your position in queue:** {position} / {total} **Estimated wait:** {wait_str} Please keep this tab open — the demo will start automatically when it's your turn. --- *Want unlimited access? [Duplicate this Space]({DUPLICATE_SPACE_URL}) or clone the [GitHub repo]({GITHUB_REPO_URL}) to run locally!* """ def _welcome_modal_markdown() -> str: mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES return f"""## Welcome to Kimodo Demo You have been granted a **{mins}-minute** demo session. Your session timer has started. Click the button below to begin! """ def _expiry_modal_markdown() -> str: mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES return f"""## Session Expired Your {mins}-minute demo session has ended. Thank you for trying Kimodo! Refresh this page to rejoin the queue, or [duplicate this Space]({DUPLICATE_SPACE_URL}) for unlimited access. """ class QueueManager: """Orchestrates HF mode: queue modals, welcome modal, session timer, promotion.""" def __init__( self, queue: UserQueue, server: viser.ViserServer, setup_demo_for_client: Callable[[viser.ClientHandle], None], cleanup_session: Callable[[int], None], ) -> None: self._queue = queue self._server = server self._setup_demo_for_client = setup_demo_for_client self._cleanup_session = cleanup_session self._max_seconds = queue._max_seconds self._queue_modal_handles: dict[int, tuple[Any, Any]] = {} self._welcome_modal_handles: dict[int, Any] = {} self._expiry_timers: dict[int, threading.Timer] = {} self._lock = threading.Lock() self._refresh_stop = threading.Event() self._refresh_thread = threading.Thread( target=self._queue_modal_refresh_loop, name="queue-modal-refresh", daemon=True, ) self._refresh_thread.start() def _queue_modal_refresh_loop(self) -> None: """Periodically refresh queue modals so position, total, and estimated wait stay current.""" while not self._refresh_stop.wait(timeout=QUEUE_MODAL_REFRESH_INTERVAL_SEC): self._update_all_queue_modals() def on_client_connect(self, client: viser.ClientHandle) -> None: """Handle new connection: activate if slot free, else enqueue and show queue modal.""" client_id = client.client_id if self._queue.try_activate(client_id): try: self._setup_demo_for_client(client) except RuntimeError as e: if "CUDA error" in str(e): print(f"CUDA error while setting up client {client_id}: {e}") return raise self._start_session_timer(client_id) self._show_welcome_modal(client) else: self._queue.enqueue(client_id) self._show_queue_modal(client) self._update_all_queue_modals() def on_client_disconnect(self, client_id: int) -> None: """Remove from queue/active, cancel timer, promote next if was active. Session/scene cleanup is done by the demo's on_client_disconnect. """ with self._lock: self._expiry_timers.pop(client_id, None) self._queue_modal_handles.pop(client_id, None) self._welcome_modal_handles.pop(client_id, None) was_active = self._queue.remove(client_id) if was_active: self._promote_next_user() else: self._update_all_queue_modals() def _show_queue_modal(self, client: viser.ClientHandle) -> None: client_id = client.client_id pos, total = self._queue.get_queue_position(client_id) or (0, 0) wait_sec = self._queue.get_estimated_wait_seconds(client_id) md_content = _queue_modal_markdown(pos, total, wait_sec) modal = client.gui.add_modal( "Kimodo Demo — Please Wait", size="xl", show_close_button=False, ) with modal: md_handle = client.gui.add_markdown(md_content) with self._lock: self._queue_modal_handles[client_id] = (modal, md_handle) def _show_quick_start_modal(self, client: viser.ClientHandle) -> None: """Show the quick start instructions modal (same as non-HF mode).""" with client.gui.add_modal( "Welcome — Quick Start", size="xl", show_close_button=True, save_choice="kimodo.demo.quick_start_ack", ) as quick_start_modal: client.gui.add_markdown(DEMO_UI_QUICK_START_MODAL_MD) client.gui.add_button("Got it (don't remind me again)").on_click(lambda _: quick_start_modal.close()) def _show_welcome_modal(self, client: viser.ClientHandle) -> None: client_id = client.client_id def _on_start_demo(_: Any) -> None: modal.close() self._show_quick_start_modal(client) modal = client.gui.add_modal( "Welcome to Kimodo Demo", size="xl", show_close_button=True, ) with modal: client.gui.add_markdown(_welcome_modal_markdown()) client.gui.add_button("Start Demo").on_click(_on_start_demo) with self._lock: self._welcome_modal_handles[client_id] = modal def _update_all_queue_modals(self) -> None: with self._lock: handles = list(self._queue_modal_handles.items()) for client_id, (modal, md_handle) in handles: pos_total = self._queue.get_queue_position(client_id) if pos_total is None: continue pos, total = pos_total wait_sec = self._queue.get_estimated_wait_seconds(client_id) try: md_handle.content = _queue_modal_markdown(pos, total, wait_sec) except Exception: pass def _promote_next_user(self) -> None: promoted_id = self._queue.promote_next() if promoted_id is None: return clients = self._server.get_clients() client = clients.get(promoted_id) if client is None: return with self._lock: old = self._queue_modal_handles.pop(promoted_id, None) if old is not None: try: old[0].close() except Exception: pass try: self._setup_demo_for_client(client) except RuntimeError as e: if "CUDA error" in str(e): print(f"CUDA error while setting up client {promoted_id}: {e}") return raise self._start_session_timer(promoted_id) self._show_welcome_modal(client) self._update_all_queue_modals() def _start_session_timer(self, client_id: int) -> None: def on_expiry() -> None: self._on_session_expired(client_id) t = threading.Timer(self._max_seconds, on_expiry) t.daemon = True with self._lock: self._expiry_timers[client_id] = t t.start() def _on_session_expired(self, client_id: int) -> None: with self._lock: self._expiry_timers.pop(client_id, None) if not self._queue.is_active(client_id): return self._queue.remove(client_id) clients = self._server.get_clients() client = clients.get(client_id) if client is not None: try: with client.gui.add_modal( "Session Expired", size="lg", show_close_button=False, ) as modal_ctx: client.gui.add_markdown(_expiry_modal_markdown()) except Exception: pass self._cleanup_session(client_id) self._promote_next_user() ================================================ FILE: kimodo/demo/state.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from dataclasses import dataclass, field from typing import Optional import torch import kimodo.viz.viser_utils as viser_utils import viser from kimodo.skeleton import SkeletonBase from kimodo.viz.viser_utils import GuiElements from .config import ( DEFAULT_CUR_DURATION, DEFAULT_MODEL, DEFAULT_PLAYBACK_SPEED, ) @dataclass(frozen=True) class ModelBundle: model: object motion_rep: object skeleton: SkeletonBase model_fps: float @dataclass class ClientSession: """Per-client session data.""" client: viser.ClientHandle gui_elements: GuiElements motions: dict # character_name -> CharacterMotion constraints: dict[str, viser_utils.ConstraintSet] = field(default_factory=dict) timeline_data: object = None frame_idx: int = 0 playing: bool = False playback_speed: float = DEFAULT_PLAYBACK_SPEED cur_duration: float = DEFAULT_CUR_DURATION max_frame_idx: int = 100 # will be updated based on model_fps updating_motions: bool = False edit_mode: bool = False model_name: str = DEFAULT_MODEL model_fps: float = 0.0 skeleton: SkeletonBase | None = None motion_rep: object | None = None examples_base_dir: str = "" example_dict: dict[str, str] = field(default_factory=dict) gui_examples_dropdown: Optional[viser.GuiInputHandle] = None gui_save_example_path_text: Optional[viser.GuiInputHandle] = None gui_model_selector: Optional[viser.GuiInputHandle] = None last_prompt_texts: Optional[list[str]] = None last_prompt_embeddings: Optional[torch.Tensor] = None last_prompt_lengths: Optional[list[int]] = None edit_mode_snapshot: Optional[dict[int, dict[str, object]]] = None undo_drag_snapshot: Optional[dict[str, object]] = None show_only_current_constraint: bool = False # False = Show All, True = Show only Current ================================================ FILE: kimodo/demo/ui.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # ruff: noqa: I001 import math import os import threading from typing import Optional from kimodo.constraints import load_constraints_lst, save_constraints_lst from kimodo.exports.bvh import motion_to_bvh_bytes, save_motion_bvh from kimodo.exports.motion_io import ( amass_npz_to_bytes, g1_csv_to_bytes, kimodo_npz_to_bytes, load_motion_file, save_kimodo_npz, ) from kimodo.model.registry import kimodo_short_key_for_skeleton_dataset, registry_skeleton_for_joint_count from kimodo.tools import to_torch from kimodo.viz import viser_utils from kimodo.viz.viser_utils import GuiElements import numpy as np import torch import viser from viser._timeline_api import PROMPT_COLORS from . import generation from .config import ( DEFAULT_CUR_DURATION, DEMO_UI_INSTRUCTIONS_TAB_MD, get_datasets, get_model_info, get_models_for_dataset_skeleton, get_skeleton_display_name, get_skeleton_display_names_for_dataset, get_skeleton_key_from_display_name, get_short_key_from_display_name, HF_MODE, INIT_POSTPROCESSING, MODEL_NAMES, NB_TRANSITION_FRAMES, SHOW_TRANSITION_PARAMS, ) from .state import ClientSession from kimodo.skeleton import G1Skeleton34, SOMASkeleton30, SOMASkeleton77 def extract_intervals_and_singles(t: torch.Tensor): intervals = [] intervals_indices = [] single_frames = [] single_frames_indices = [] start_idx = 0 for i in range(1, len(t) + 1): # End of run if: # - end of tensor # - non-consecutive value if i == len(t) or t[i] != t[i - 1] + 1: run_length = i - start_idx if run_length >= 2: intervals.append((int(t[start_idx]), int(t[i - 1]))) intervals_indices.append((start_idx, i - 1)) else: single_frames.append(int(t[start_idx])) single_frames_indices.append(start_idx) start_idx = i return intervals, intervals_indices, single_frames, single_frames_indices def create_gui( demo, client: viser.ClientHandle, model_name: str, model_fps: float, ): """Create GUI elements for a specific client.""" client_id = client.client_id def get_active_session(event_client: viser.ClientHandle | None): if event_client is None: return None if not demo.client_active(event_client.client_id): return None return demo.client_sessions[event_client.client_id] def build_timeline_tracks(): timeline = client.timeline demo.set_timeline_defaults(timeline, model_fps) timeline.set_visible(True) timeline.set_current_frame(0) timeline_tracks = {} fullbody_id = timeline.add_track( "Full-Body", track_type="keyframe", color=(219, 148, 86), height_scale=0.5, ) timeline_tracks[fullbody_id] = { "name": "Full-Body", "track_type": "keyframe", "color": (219, 148, 86), "height_scale": 0.5, } root2d_id = timeline.add_track( "2D Root", track_type="keyframe", color=(150, 100, 200), height_scale=0.5, ) timeline_tracks[root2d_id] = { "name": "2D Root", "track_type": "keyframe", "color": (150, 100, 200), "height_scale": 0.5, } lefthand_id = timeline.add_track( "Left Hand", track_type="keyframe", color=(100, 200, 150), height_scale=0.5, ) timeline_tracks[lefthand_id] = { "name": "Left Hand", "track_type": "keyframe", "color": (100, 200, 150), "height_scale": 0.5, } righthand_id = timeline.add_track( "Right Hand", track_type="keyframe", color=(200, 100, 150), height_scale=0.5, ) timeline_tracks[righthand_id] = { "name": "Right Hand", "track_type": "keyframe", "color": (200, 100, 150), "height_scale": 0.5, } leftfoot_id = timeline.add_track( "Left Foot", track_type="keyframe", color=(219, 148, 86), height_scale=0.5, ) timeline_tracks[leftfoot_id] = { "name": "Left Foot", "track_type": "keyframe", "color": (219, 148, 86), "height_scale": 0.5, } rightfoot_id = timeline.add_track( "Right Foot", track_type="keyframe", color=(150, 100, 200), height_scale=0.5, ) timeline_tracks[rightfoot_id] = { "name": "Right Foot", "track_type": "keyframe", "color": (150, 100, 200), "height_scale": 0.5, } return timeline, timeline_tracks timeline, timeline_tracks = build_timeline_tracks() # These handles are part of GuiElements, but the demo currently uses timeline + buttons # embedded in the Viser UI instead of custom controls. gui_play_pause_button = None gui_next_frame_button = None gui_prev_frame_button = None gui_timeline = None gui_duration_slider = None # now other gui elements tab_group = client.gui.add_tab_group() # # Playback and Motion generation controls # with tab_group.add_tab("Generate", viser.Icon.WALK): with client.gui.add_folder("Model Selection", expand_by_default=True): info = get_model_info(model_name) if info is None: info = get_model_info(next(iter(MODEL_NAMES))) def get_allowed_skeleton_labels(dataset_ui_label: str) -> list[str]: labels = get_skeleton_display_names_for_dataset(dataset_ui_label, family="Kimodo") if HF_MODE: labels = [label for label in labels if get_skeleton_key_from_display_name(label) != "SMPLX"] return labels dataset_ui_label = "Rigplay" if HF_MODE else info.dataset_ui_label datasets = ["Rigplay"] if HF_MODE else get_datasets(family="Kimodo") skeleton_labels = get_allowed_skeleton_labels(dataset_ui_label) initial_skeleton_label = get_skeleton_display_name(info.skeleton) if initial_skeleton_label not in skeleton_labels and skeleton_labels: initial_skeleton_label = skeleton_labels[0] initial_skeleton_key = ( get_skeleton_key_from_display_name(initial_skeleton_label) if skeleton_labels else None ) models_for_pair = ( get_models_for_dataset_skeleton(dataset_ui_label, initial_skeleton_key, family="Kimodo") if initial_skeleton_key is not None else [] ) version_options = [m.display_name for m in models_for_pair] initial_version = ( info.display_name if info.display_name in version_options else (version_options[0] if version_options else "") ) gui_dataset_selector = client.gui.add_dropdown( "Training dataset", options=datasets, initial_value=dataset_ui_label, visible=not HF_MODE, ) gui_skeleton_selector = client.gui.add_dropdown( "Model" if HF_MODE else "Skeleton", options=skeleton_labels, initial_value=initial_skeleton_label, ) gui_version_selector = client.gui.add_dropdown( "Version", options=version_options, initial_value=initial_version, ) gui_version_selector.visible = len(models_for_pair) > 1 gui_model_display = client.gui.add_markdown( content=f"**Model:** {initial_version}", ) gui_load_model_button = client.gui.add_button( "Load model", hint="Load the selected model (dataset, skeleton, version).", ) class ModelSelectorHandle: """Wrapper so session and callbacks can treat three dropdowns as one.""" def __init__(self): self._dataset = gui_dataset_selector self._skeleton = gui_skeleton_selector self._version = gui_version_selector self._display = gui_model_display @property def value(self) -> str: return get_short_key_from_display_name(self._version.value) or "" def set_from_short_key(self, short_key: str) -> None: info = get_model_info(short_key) if info is None: return dataset_ui_label = "Rigplay" if HF_MODE else info.dataset_ui_label self._dataset.value = dataset_ui_label self._skeleton.options = get_allowed_skeleton_labels(dataset_ui_label) skeleton_label = get_skeleton_display_name(info.skeleton) if skeleton_label not in self._skeleton.options and self._skeleton.options: skeleton_label = self._skeleton.options[0] self._skeleton.value = skeleton_label skeleton_key = get_skeleton_key_from_display_name(skeleton_label) if skeleton_key is None: return models = get_models_for_dataset_skeleton(dataset_ui_label, skeleton_key, family="Kimodo") self._version.options = [m.display_name for m in models] self._version.value = ( info.display_name if info.display_name in self._version.options else self._version.options[0] ) self._version.visible = len(models) > 1 self._display.content = f"**Model:** {self._version.value}" gui_model_selector = ModelSelectorHandle() with client.gui.add_folder("Examples", expand_by_default=True): examples_base_dir = demo.get_examples_base_dir(model_name, absolute=True) example_dict = viser_utils.load_example_cases(examples_base_dir) example_names = list(example_dict.keys()) if not example_names: example_names = [""] gui_examples_dropdown = client.gui.add_dropdown( "Example", options=example_names, initial_value=example_names[0], ) gui_load_example_button = client.gui.add_button( "Load Example", hint="Load the selected example.", disabled=not example_dict, ) def update_examples_dropdown( new_example_dict: dict[str, str], keep_selection: bool = True, ) -> None: if not new_example_dict: gui_examples_dropdown.options = [""] gui_examples_dropdown.value = "" gui_load_example_button.disabled = True return gui_load_example_button.disabled = False example_names_local = list(new_example_dict.keys()) gui_examples_dropdown.options = example_names_local if keep_selection and gui_examples_dropdown.value in example_names_local: return gui_examples_dropdown.value = example_names_local[0] with client.gui.add_folder("Generate", expand_by_default=True): gui_duration = client.gui.add_markdown(content=f"Total duration: {DEFAULT_CUR_DURATION:.1f} (sec)") def update_duration_gui(duration): gui_duration.content = f"Total duration: {duration:.1f} (sec)" def compute_prompt_num_frames(prompt_values): """Convert timeline prompt bounds to per-prompt frame counts. Convention in this demo: - All prompts except the last are treated as [start_frame, end_frame) (end is exclusive). - The last prompt is treated as [start_frame, end_frame] (end is inclusive). - This assumes the prompts values are sorted by start_frame. """ if len(prompt_values) == 0: return [] num_frames = [] for i, x in enumerate(prompt_values): cur = x.end_frame - x.start_frame if i == len(prompt_values) - 1: cur += 1 num_frames.append(cur) return num_frames def update_duration_auto(): session = demo.client_sessions[client_id] prompt_values = sorted( [x for x in timeline._prompts.values()], key=lambda x: x.start_frame, ) num_frames = compute_prompt_num_frames(prompt_values) total_nb_frames = sum(num_frames) cur_duration = total_nb_frames / session.model_fps set_new_duration(client_id, cur_duration) update_duration_gui(cur_duration) gui_num_samples_slider = client.gui.add_slider( "Num Samples", min=1, max=10, step=1, initial_value=1, visible=not HF_MODE, ) gui_use_soma_layer_checkbox = client.gui.add_checkbox( "SOMA layer", initial_value=False, visible="soma" in (model_name or ""), ) with client.gui.add_folder("Model Parameters", expand_by_default=False): gui_seed = client.gui.add_number("Seed", initial_value=42) with client.gui.add_folder("Diffusion", expand_by_default=False): gui_diffusion_steps_slider = client.gui.add_slider( "Denoising Steps", min=2, max=1000, step=10, initial_value=100, ) with client.gui.add_folder("Classifier-Free Guidance", expand_by_default=False): gui_cfg_checkbox = client.gui.add_checkbox( "Enable", initial_value=True, visible=True, ) gui_cfg_text_weight_slider = client.gui.add_slider( "Text Weight", min=0.0, max=5.0, step=0.1, initial_value=2.0, visible=True, ) gui_cfg_constraint_weight_slider = client.gui.add_slider( "Constraint Weight", min=0.0, max=5.0, step=0.1, initial_value=2.0, visible=True, ) with client.gui.add_folder( "Transitions", expand_by_default=False, visible=SHOW_TRANSITION_PARAMS, ): gui_num_transition_frames_slider = client.gui.add_slider( "Transition frames", min=1, max=10, step=1, initial_value=NB_TRANSITION_FRAMES, visible=True, ) with client.gui.add_folder("Post Processing", expand_by_default=False): _model_name = model_name or "" _postprocess_visible = "g1" not in _model_name gui_postprocess_checkbox = client.gui.add_checkbox( "Enable", initial_value=INIT_POSTPROCESSING, hint="Apply motion post-processing (not available for G1)", visible=_postprocess_visible, ) gui_root_margin = client.gui.add_number( "Root Margin", min=0.0, # max=0.5, step=0.01, initial_value=0.04, hint="Margin for root position (meters). Lower values pin root closer to target.", visible=INIT_POSTPROCESSING and _postprocess_visible, ) @gui_postprocess_checkbox.on_update def _(event: viser.GuiEvent) -> None: if get_active_session(event.client) is None: return # disable the slider if sharing transition is False gui_root_margin.visible = gui_postprocess_checkbox.value gui_real_robot_rotations_checkbox = client.gui.add_checkbox( "Real robot rotations", initial_value=False, hint="Project joint rotations to G1 real robot DoF (1-DoF per joint) and clamp to axis limits from the MuJoCo XML.", visible="g1" in _model_name, ) gui_generate_button = client.gui.add_button("Generate", color="green") with client.gui.add_folder("Constraints", expand_by_default=False): gui_gizmo_space_dropdown = client.gui.add_dropdown( "Gizmo space", ("Local", "World"), initial_value="Local", visible="g1" not in _model_name, ) gui_edit_constraint_button = client.gui.add_button("Enter Editing Mode") gui_snap_to_constraint_button = client.gui.add_button( "Snap to Constraint", disabled=True, ) gui_reset_constraint_button = client.gui.add_button( "Reset Constraint", disabled=True, ) gui_undo_drag_button = client.gui.add_button( "Undo Move", disabled=True, ) with client.gui.add_folder("Root 2D Options", expand_by_default=True): gui_dense_path_checkbox = client.gui.add_checkbox( "Make Smooth Path", initial_value=False, visible=True, ) gui_show_only_current_constraint_checkbox = client.gui.add_checkbox( "Show only Current", initial_value=False, hint="Show only constraint overlays at the current frame; uncheck to show all.", ) def apply_constraint_overlay_visibility(session: ClientSession) -> None: demo._apply_constraint_overlay_visibility(session) @gui_show_only_current_constraint_checkbox.on_update def _(event: viser.GuiEvent) -> None: session = get_active_session(event.client) if session is None: return session.show_only_current_constraint = gui_show_only_current_constraint_checkbox.value apply_constraint_overlay_visibility(session) gui_clear_all_constraints_button = client.gui.add_button( "Clear All Constraints", color="red", ) def has_constraint_at_frame(session: ClientSession, frame_idx: int) -> bool: for constraint_name in ["Full-Body", "End-Effectors", "2D Root"]: constraint = session.constraints.get(constraint_name) if constraint is None: continue if frame_idx in constraint.keyframes: return True return False def update_snap_to_constraint_button(session: ClientSession) -> None: gui_snap_to_constraint_button.disabled = not has_constraint_at_frame(session, session.frame_idx) def ensure_edit_snapshot(session: ClientSession, motion, frame_idx: int) -> None: if session.edit_mode_snapshot is None: session.edit_mode_snapshot = {} if frame_idx in session.edit_mode_snapshot: return session.edit_mode_snapshot[frame_idx] = { "joints_pos": motion.get_joints_pos(frame_idx), "joints_rot": motion.get_joints_rot(frame_idx), } def _update_dense_path(motion, session): constraint_info = session.constraints["2D Root"].get_constraint_info() if len(constraint_info["frame_idx"]) > 0: min_root_frame = min(constraint_info["frame_idx"]) max_root_frame = max(constraint_info["frame_idx"]) motion.set_projected_root_pos_path( constraint_info["root_pos"][:, [0, 2]], min_frame_idx=min_root_frame, max_frame_idx=max_root_frame, ) # Delay (ms) after last keyframe/interval move before updating path = "on release". DENSE_PATH_AFTER_RELEASE_MS = 300 def _schedule_dense_path_after_release(session): """Schedule a single path update to run after user stops dragging.""" if "2D Root" not in session.constraints or not session.constraints["2D Root"].dense_path: return tdata = session.timeline_data if tdata.get("dense_path_after_release_timer"): tdata["dense_path_after_release_timer"].cancel() delay = DENSE_PATH_AFTER_RELEASE_MS / 1000.0 def run(): if not demo.client_active(client_id): return sess = demo.client_sessions[client_id] tdata["dense_path_after_release_timer"] = None if "2D Root" not in sess.constraints or not sess.constraints["2D Root"].dense_path: return mot = list(sess.motions.values())[0] _update_dense_path(mot, sess) t = threading.Timer(delay, run) tdata["dense_path_after_release_timer"] = t t.start() @gui_dense_path_checkbox.on_update def _(event: viser.GuiEvent) -> None: session = get_active_session(event.client) if session is None: return if gui_dense_path_checkbox.value: # Make sure 0 and max_frame_idx keyframes are added to the constraint # since dense path should cover full duration for best model performance root_2d_track = session.timeline_data["tracks_ids"]["2D Root"] # add a locked keyframe at 0 start_keyframe_id = client.timeline.add_locked_keyframe( # noqa root_2d_track, 0, opacity=0.0, ) session.timeline_data["keyframes"][start_keyframe_id] = { "frame": 0, "track_id": root_2d_track, "locked": True, "opacity": 0.0, "value": None, } add_constraint_callback( start_keyframe_id, "2D Root", (0, 0), verbose=False, ) # add a locked keyframe at max_frame_idx end_keyframe_id = client.timeline.add_locked_keyframe( root_2d_track, session.max_frame_idx, opacity=0.0, ) session.timeline_data["keyframes"][end_keyframe_id] = { "frame": session.max_frame_idx, "track_id": root_2d_track, "locked": True, "opacity": 0.0, "value": None, } add_constraint_callback( end_keyframe_id, "2D Root", (session.max_frame_idx, session.max_frame_idx), verbose=False, ) # add a locked interval only for visual purposes locked_interval = client.timeline.add_locked_interval( # noqa root_2d_track, start_frame=0, end_frame=session.max_frame_idx, ) session.timeline_data["intervals"][locked_interval] = { "track_id": root_2d_track, "start_frame_idx": 0, "end_frame_idx": session.max_frame_idx, "locked": True, "opacity": 0.3, "value": None, } session.constraints["2D Root"].set_dense_path(gui_dense_path_checkbox.value) if session.constraints["2D Root"].dense_path: # update the character motion to reflect the full path # will be full length by construction, no need to specify min/max frame idx motion = list(session.motions.values())[0] _update_dense_path(motion, session) # remove locked interval and locked keyframes if not gui_dense_path_checkbox.value: # Get all locked keyframes keyframes_to_remove = [] for uuid, keyframe in client.timeline._keyframes.items(): if keyframe.locked: keyframes_to_remove.append(uuid) _data = session.timeline_data["keyframes"][uuid] remove_constraint_callback( uuid, constraint_type=session.timeline_data["tracks"][_data["track_id"]]["name"], frame_range=(_data["frame"], _data["frame"]), verbose=False, ) intervals_to_remove = [] # remove all locked intervals for uuid, interval in client.timeline._intervals.items(): if interval.locked: intervals_to_remove.append(uuid) # removing keyframes and intervals for uuid in keyframes_to_remove: client.timeline.remove_keyframe(uuid) for uuid in intervals_to_remove: client.timeline.remove_interval(uuid) apply_constraint_overlay_visibility(session) with client.gui.add_folder( "Load/Save", expand_by_default=False, visible=not HF_MODE, ): with client.gui.add_folder("Motion", expand_by_default=False): gui_save_motion_path_text = client.gui.add_text("Save Path", initial_value="output") gui_save_motion_format_dropdown = client.gui.add_dropdown( "Save Format", options=( ["NPZ", "CSV"] if "g1" in model_name.lower() else ["NPZ", "AMASS NPZ"] if "smplx" in model_name.lower() else ["NPZ", "BVH"] ), initial_value="NPZ", ) gui_save_bvh_standard_tpose_checkbox = client.gui.add_checkbox( "Standard T-pose", initial_value=False, hint="For BVH export, use the standard T-pose rest skeleton.", visible=False, ) gui_save_motion_button = client.gui.add_button( "Save Motion", hint="Save the current motion (format + path above)", ) gui_load_motion_path_text = client.gui.add_text( "Load Path", initial_value="output.npz", hint="SOMA .bvh, Kimodo or AMASS .npz, or G1 MuJoCo .csv", ) gui_load_motion_button = client.gui.add_button( "Load Motion", hint="Load the selected motion", ) with client.gui.add_folder("Constraints", expand_by_default=False): gui_save_constraints_path_text = client.gui.add_text( "Save Path", initial_value="output_constraints.json" ) gui_save_constraints_button = client.gui.add_button("Save Constraints") gui_load_constraints_path_text = client.gui.add_text( "Load Path", initial_value="output_constraints.json" ) gui_load_constraints_button = client.gui.add_button("Load Constraints") with client.gui.add_folder("Example", expand_by_default=False): gui_save_example_path_text = client.gui.add_text( "Save Dir", initial_value=os.path.join( demo.get_examples_base_dir(model_name, absolute=True), "custom_example_1", ), ) gui_save_example_button = client.gui.add_button("Save Example") gui_load_example_path_text = client.gui.add_text( "Load Dir", initial_value=os.path.join( demo.get_examples_base_dir(model_name, absolute=True), "custom_example_1", ), ) gui_load_gt_checkbox = client.gui.add_checkbox( "Load GT instead", initial_value=False, ) gui_load_example_from_path_button = client.gui.add_button("Load Example") def _get_primary_motion(session: ClientSession): return list(session.motions.values())[0] def _motion_to_numpy_dict(motion) -> dict[str, np.ndarray]: joints_pos = motion.joints_pos.detach().cpu().numpy() joints_rot = motion.joints_rot.detach().cpu().numpy() joints_local_rot = motion.joints_local_rot.detach().cpu().numpy() if joints_pos.ndim != 3: raise ValueError(f"Expected unbatched joints_pos with shape [T, J, 3], got {joints_pos.shape}") if joints_rot.ndim != 4: raise ValueError(f"Expected unbatched joints_rot with shape [T, J, 3, 3], got {joints_rot.shape}") if joints_local_rot.ndim != 4: raise ValueError( "Expected unbatched joints_local_rot with shape " f"[T, J, 3, 3], got {joints_local_rot.shape}" ) motion_data = { "posed_joints": joints_pos, "global_rot_mats": joints_rot, "local_rot_mats": joints_local_rot, "root_positions": joints_pos[:, motion.skeleton.root_idx, :], } if motion.foot_contacts is not None: foot_contacts = motion.foot_contacts.detach().cpu().numpy() if foot_contacts.ndim != 2: raise ValueError( f"Expected unbatched foot_contacts with shape [T, C], got {foot_contacts.shape}" ) motion_data["foot_contacts"] = foot_contacts return motion_data def _coerce_save_path(raw_path: str, *, ext: str) -> str: """Ensure the save path ends with the correct extension for the chosen format.""" name = (raw_path or "").strip() if name == "": return f"output{ext}" known_exts = (".npz", ".bvh", ".csv") if name.lower().endswith(known_exts): return os.path.splitext(name)[0] + ext if os.path.splitext(name)[1] == "": return name + ext return name def save_motion(client, save_path, fmt): session = demo.client_sessions[client.client_id] motion = _get_primary_motion(session) motion_data = _motion_to_numpy_dict(motion) if fmt == "BVH": save_path = _coerce_save_path(save_path, ext=".bvh") save_motion_bvh( save_path, motion.joints_local_rot, motion.joints_pos[:, session.skeleton.root_idx, :], skeleton=session.skeleton, fps=float(session.model_fps), standard_tpose=bool(gui_save_bvh_standard_tpose_checkbox.value), ) elif fmt == "CSV": save_path = _coerce_save_path(save_path, ext=".csv") data = g1_csv_to_bytes(motion_data, session.skeleton, demo.device) with open(save_path, "wb") as f: f.write(data) elif fmt == "AMASS NPZ": save_path = _coerce_save_path(save_path, ext=".npz") data = amass_npz_to_bytes(motion_data, session.skeleton, session.model_fps) with open(save_path, "wb") as f: f.write(data) else: save_path = _coerce_save_path(save_path, ext=".npz") save_kimodo_npz(save_path, motion_data) return save_path @gui_save_motion_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client if get_active_session(event_client) is None: return raw_path = gui_save_motion_path_text.value fmt = str(gui_save_motion_format_dropdown.value).upper() try: saved_path = save_motion(event_client, raw_path, fmt) event_client.add_notification( title="Motion saved!", body=f"Saved motion to {saved_path}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to save motion!", body=str(e), auto_close_seconds=5.0, color="red", ) def load_motion(client, load_path): session = demo.client_sessions[client.client_id] fps_arg = session.model_fps if session.model_fps and session.model_fps > 0 else None motion_dict, num_joints_motion = load_motion_file(load_path, target_fps=fps_arg) target_skel = registry_skeleton_for_joint_count(num_joints_motion) current_info = get_model_info(session.model_name) current_skel = current_info.skeleton if current_info is not None else None if current_skel != target_skel: dataset = current_info.dataset if current_info is not None else "RP" new_key = kimodo_short_key_for_skeleton_dataset(target_skel, dataset) if new_key is None: new_key = kimodo_short_key_for_skeleton_dataset(target_skel, "RP") if new_key is None: raise ValueError( f"No Kimodo model found for skeleton {target_skel} (motion has J={num_joints_motion})." ) if new_key != session.model_name: gui_model_selector.set_from_short_key(new_key) apply_model_selection(new_key) _update_visibility_for_loaded_model(new_key) client.add_notification( title="Model switched", body=f"Switched to {new_key} to match loaded motion (J={num_joints_motion}).", auto_close_seconds=5.0, color="blue", ) session = demo.client_sessions[client.client_id] joints_pos = motion_dict["posed_joints"].to(device=demo.device, dtype=torch.float32) joints_rot = motion_dict["global_rot_mats"].to(device=demo.device, dtype=torch.float32) foot_contacts = motion_dict.get("foot_contacts") if foot_contacts is not None: foot_contacts = foot_contacts.to(device=demo.device, dtype=torch.float32) # Support both batched [B, T, J, 3] and unbatched [T, J, 3]; take first sample if batched if joints_pos.ndim == 4: joints_pos = joints_pos[0] if joints_rot.ndim == 5: joints_rot = joints_rot[0] if foot_contacts is not None and foot_contacts.ndim == 3: foot_contacts = foot_contacts[0] # Motion must match the current model's skeleton after auto-switch num_joints_loaded = joints_pos.shape[1] num_joints_skeleton = session.skeleton.nbjoints if num_joints_loaded != num_joints_skeleton: # Backward compat: expand 30-joint SOMA motion to 77 if ( num_joints_loaded == 30 and num_joints_skeleton == 77 and isinstance(session.skeleton, SOMASkeleton77) ): from kimodo.skeleton import global_rots_to_local_rots skel30 = SOMASkeleton30().to(demo.device) if "local_rot_mats" in motion_dict: local_rot_30 = motion_dict["local_rot_mats"].to(device=demo.device, dtype=torch.float32) if local_rot_30.ndim == 4: local_rot_30 = local_rot_30[0] else: local_rot_30 = global_rots_to_local_rots(joints_rot, skel30) local_rot_77 = skel30.to_SOMASkeleton77(local_rot_30) root_positions = joints_pos[:, skel30.root_idx, :] joints_rot, joints_pos, _ = session.skeleton.fk(local_rot_77, root_positions) if foot_contacts is not None and foot_contacts.shape[-1] == 4: foot_contacts = torch.cat( [ foot_contacts[..., :2], foot_contacts[..., 1:2], foot_contacts[..., 2:4], foot_contacts[..., 3:4], ], dim=-1, ) else: raise ValueError( f"The loaded motion has {num_joints_loaded} joints but the current model " f"({session.model_name}) has {num_joints_skeleton} joints. " "Load a motion generated with the same skeleton, or switch the model to match the motion." ) elif joints_rot.shape[1] != num_joints_skeleton: raise ValueError( f"Rotation data has {joints_rot.shape[1]} joints but the current model has " f"{num_joints_skeleton} joints. The NPZ may be corrupted or from a different skeleton." ) # Apply G1 real robot projection (1-DoF per joint + axis limits) if enabled. if ( "g1" in session.model_name and isinstance(session.skeleton, G1Skeleton34) and gui_real_robot_rotations_checkbox.value ): joints_pos, joints_rot = generation.apply_g1_real_robot_projection( session.skeleton, joints_pos, joints_rot ) # Update duration and frame range based on loaded motion num_frames = joints_pos.shape[0] duration = num_frames / session.model_fps # Update GUI elements session.cur_duration = duration session.max_frame_idx = num_frames - 1 # Clear existing motions and add the loaded one demo.clear_motions(client.client_id) demo.add_character_motion( client, session.skeleton, joints_pos, joints_rot, foot_contacts, ) # Reset to frame 0 demo.set_frame(client.client_id, 0) @gui_load_motion_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return load_path = gui_load_motion_path_text.value loading_notif = event_client.add_notification( title="Loading motion...", body=f"Loading from {load_path}", loading=True, with_close_button=False, auto_close_seconds=None, ) try: load_motion(event_client, load_path) loading_notif.title = "Motion loaded!" loading_notif.body = f"Loaded motion from {load_path} ({session.max_frame_idx + 1} frames, {session.cur_duration:.2f}s)" loading_notif.loading = False loading_notif.with_close_button = True loading_notif.auto_close_seconds = 5.0 loading_notif.color = "green" except Exception as e: import traceback traceback.print_exc() loading_notif.title = "Failed to load motion!" loading_notif.body = str(e) loading_notif.loading = False loading_notif.with_close_button = True loading_notif.auto_close_seconds = 10.0 loading_notif.color = "red" def save_constraints(client, save_path): session = demo.client_sessions[client.client_id] # Keep save behavior aligned with demo frame convention: # valid frame indices are [0, max_frame_idx], so count is +1. num_frames = session.max_frame_idx + 1 model_bundle = demo.load_model(session.model_name) constraints_lst = demo.compute_model_constraints_lst(session, model_bundle, num_frames) save_constraints_lst(save_path, constraints_lst) @gui_save_constraints_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client if get_active_session(event_client) is None: return try: save_path = gui_save_constraints_path_text.value save_constraints(event_client, save_path) event_client.add_notification( title="Constraints saved!", body=f"Saved constraints to {save_path}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to save constraints!", body=str(e), auto_close_seconds=10.0, color="red", ) def load_constraints(client, load_path): session = demo.client_sessions[client.client_id] constraints_lst = load_constraints_lst(load_path, skeleton=session.skeleton) # Clear existing constraints first with session.timeline_data["keyframe_update_lock"]: for constraint in list(session.constraints.values()): constraint.clear() client.timeline.clear_keyframes() client.timeline.clear_intervals() # Add loaded constraints to the session # We need to directly add constraint data, not read from current motion device = demo.device for constraint_obj in constraints_lst: constraint_type = constraint_obj.name # decompose the frame indices into intervals or single keyframes frame_indices = constraint_obj.frame_indices ( intervals, intervals_indices, single_frames, single_frames_indices, ) = extract_intervals_and_singles(frame_indices) load_targets: list[dict] = [] root_pos = None if constraint_type == "root2d": # smooth_root_2d is [T, 2] (x, z), convert to [T, 3] (x, 0, z) num_frames = constraint_obj.smooth_root_2d.shape[0] root_pos = torch.zeros(num_frames, 3, device=device) root_pos[:, 0] = constraint_obj.smooth_root_2d[:, 0] root_pos[:, 2] = constraint_obj.smooth_root_2d[:, 1] load_targets = [ { "track_name": "2D Root", "constraint_track": session.constraints["2D Root"], } ] elif constraint_type == "fullbody": load_targets = [ { "track_name": "Full-Body", "constraint_track": session.constraints["Full-Body"], } ] elif constraint_type in { "left-hand", "right-hand", "left-foot", "right-foot", }: track_name = { "left-hand": "Left Hand", "right-hand": "Right Hand", "left-foot": "Left Foot", "right-foot": "Right Foot", }[constraint_type] load_targets = [ { "track_name": track_name, "constraint_track": session.constraints["End-Effectors"], "joint_names": constraint_obj.joint_names, "end_effector_type": constraint_type, } ] elif constraint_type in {"end-effector", "end-effectors"}: # Backward-compatible loader: # split a generic end-effector constraint into per-limb timeline tracks. joint_names_set = set(constraint_obj.joint_names) for jname, track_name, eff_type in [ ("LeftHand", "Left Hand", "left-hand"), ("RightHand", "Right Hand", "right-hand"), ("LeftFoot", "Left Foot", "left-foot"), ("RightFoot", "Right Foot", "right-foot"), ]: if jname not in joint_names_set: continue target_joint_names = [jname] if "Hips" in joint_names_set: target_joint_names.append("Hips") load_targets.append( { "track_name": track_name, "constraint_track": session.constraints["End-Effectors"], "joint_names": target_joint_names, "end_effector_type": eff_type, } ) if not load_targets: raise KeyError( "No recognized end-effector joint in constraint " f"joint_names={constraint_obj.joint_names}" ) else: raise KeyError(f"Unsupported constraint type in loader: {constraint_type}") for target in load_targets: track_id = session.timeline_data["tracks_ids"][target["track_name"]] constraint_track = target["constraint_track"] # add intervals for (start_idx, end_idx), (start_idx_t, end_idx_t) in zip(intervals, intervals_indices): # Add to timeline interval_id = client.timeline.add_interval(track_id, start_idx, end_idx) session.timeline_data["intervals"][interval_id] = { "track_id": track_id, "start_frame_idx": start_idx, "end_frame_idx": end_idx, "locked": False, "opacity": 1.0, "value": None, } if constraint_type == "root2d": constraint_track.add_interval( interval_id, start_idx, end_idx, root_pos[start_idx_t : end_idx_t + 1], ) elif constraint_type == "fullbody": constraint_track.add_interval( interval_id, start_idx, end_idx, constraint_obj.global_joints_positions[start_idx_t : end_idx_t + 1], constraint_obj.global_joints_rots[start_idx_t : end_idx_t + 1], ) else: constraint_track.add_interval( interval_id, start_idx, end_idx, constraint_obj.global_joints_positions[start_idx_t : end_idx_t + 1], constraint_obj.global_joints_rots[start_idx_t : end_idx_t + 1], target["joint_names"], target["end_effector_type"], ) # add keyframes for frame, frame_t in zip(single_frames, single_frames_indices): # Add to timeline keyframe_id = client.timeline.add_keyframe(track_id, frame) session.timeline_data["keyframes"][keyframe_id] = { "track_id": track_id, "frame": frame, "locked": False, "opacity": 1.0, "value": None, } if constraint_type == "root2d": constraint_track.add_keyframe( keyframe_id, frame, root_pos[frame_t], ) elif constraint_type == "fullbody": constraint_track.add_keyframe( keyframe_id, frame, constraint_obj.global_joints_positions[frame_t], constraint_obj.global_joints_rots[frame_t], ) else: constraint_track.add_keyframe( keyframe_id, frame, constraint_obj.global_joints_positions[frame_t], constraint_obj.global_joints_rots[frame_t], target["joint_names"], target["end_effector_type"], ) @gui_load_constraints_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client if get_active_session(event_client) is None: return try: load_path = gui_load_constraints_path_text.value load_constraints(event_client, load_path) session = demo.client_sessions[event_client.client_id] apply_constraint_overlay_visibility(session) event_client.add_notification( title="Constraints loaded!", body=f"Loaded constraints from {load_path}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to load constraints!", body=str(e), auto_close_seconds=10.0, color="red", ) with client.gui.add_folder("Exports", expand_by_default=False): with client.gui.add_folder("Screenshot", expand_by_default=False, visible=not HF_MODE): gui_screenshot_path_text = client.gui.add_text( "Save Path", initial_value="render.png", hint="Filename for the screenshot (PNG).", ) gui_screenshot_button = client.gui.add_button( "Download Screenshot", hint="Capture the current canvas and download a PNG.", ) with client.gui.add_folder("Video", expand_by_default=False, visible=not HF_MODE): gui_video_path_text = client.gui.add_text( "Save Path", initial_value="render.mp4", hint="Filename for the video (MP4).", ) gui_video_button = client.gui.add_button( "Download Video", hint="Render every frame and download as MP4.", ) with client.gui.add_folder("Motion", expand_by_default=True): gui_download_name_text = client.gui.add_text( "Name", initial_value="output", hint="Base filename to save as (extension will be added based on format if omitted).", ) gui_download_format_dropdown = client.gui.add_dropdown( "Format", options=( ["NPZ", "CSV"] if "g1" in model_name.lower() else ["NPZ", "AMASS NPZ"] if "smplx" in model_name.lower() else ["NPZ", "BVH"] ), initial_value="NPZ", ) gui_download_bvh_standard_tpose_checkbox = client.gui.add_checkbox( "Standard T-pose", initial_value=False, hint="For BVH export, use the standard T-pose rest skeleton.", visible=False, ) gui_download_button = client.gui.add_button( "Download", hint="Download the current motion (format + name above).", ) def _download_bytes_to_browser( event_client: viser.ClientHandle, *, data: bytes, filename: str, mime_type: str = "application/octet-stream", ) -> None: """Trigger a browser download for an in-memory byte payload. Important: this intentionally does NOT use `showSaveFilePicker()` to avoid Chrome/Edge's file-write permission prompt ("this site can see edits you make"). If you want "always ask where to save", configure your browser download settings. """ import base64 import json # Base64 is the most robust way to move binary over our websocket JS channel. b64 = base64.b64encode(data).decode("ascii") js = f""" (() => {{ const filename = {json.dumps(filename)}; const mimeType = {json.dumps(mime_type)}; const b64 = {json.dumps(b64)}; // Decode base64 -> Uint8Array. const binStr = atob(b64); const bytes = new Uint8Array(binStr.length); for (let i = 0; i < binStr.length; i++) bytes[i] = binStr.charCodeAt(i); const blob = new Blob([bytes], {{ type: mimeType }}); // Standard browser download behavior. const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); }})(); """ # Reuse viser’s JS execution mechanism (used for Plotly setup). from viser import _messages as _viser_messages event_client.gui._websock_interface.queue_message( # type: ignore[attr-defined] _viser_messages.RunJavascriptMessage(source=js) ) def _motion_to_npz_bytes(motion) -> bytes: motion_data = _motion_to_numpy_dict(motion) return kimodo_npz_to_bytes(motion_data) def _motion_to_csv_bytes(motion, session: ClientSession) -> bytes: motion_data = _motion_to_numpy_dict(motion) return g1_csv_to_bytes(motion_data, session.skeleton, demo.device) def _motion_to_amass_npz_bytes(motion, session: ClientSession) -> bytes: motion_data = _motion_to_numpy_dict(motion) return amass_npz_to_bytes(motion_data, session.skeleton, session.model_fps) def _get_motion_export_formats(loaded_model_name: str) -> list[str]: model_name_lower = (loaded_model_name or "").lower() if "g1" in model_name_lower: return ["NPZ", "CSV"] if "smplx" in model_name_lower: return ["NPZ", "AMASS NPZ"] return ["NPZ", "BVH"] def _update_format_dropdown(dropdown, loaded_model_name: str) -> None: new_options = _get_motion_export_formats(loaded_model_name) current_value = str(dropdown.value) dropdown.options = new_options dropdown.value = current_value if current_value in new_options else new_options[0] def _update_motion_export_dropdown(loaded_model_name: str) -> None: _update_format_dropdown(gui_download_format_dropdown, loaded_model_name) _update_format_dropdown(gui_save_motion_format_dropdown, loaded_model_name) _update_bvh_standard_tpose_visibility() def _update_bvh_standard_tpose_visibility() -> None: gui_save_bvh_standard_tpose_checkbox.visible = ( str(gui_save_motion_format_dropdown.value).upper() == "BVH" ) gui_download_bvh_standard_tpose_checkbox.visible = ( str(gui_download_format_dropdown.value).upper() == "BVH" ) @gui_save_motion_format_dropdown.on_update def _(_event: viser.GuiEvent) -> None: _update_bvh_standard_tpose_visibility() @gui_download_format_dropdown.on_update def _(_event: viser.GuiEvent) -> None: _update_bvh_standard_tpose_visibility() def _coerce_download_filename(raw_name: str, *, ext: str) -> str: """Coerce a user-entered filename to a safe basename with the desired extension. - If empty: uses "output{ext}" - If no extension: appends ext - If endswith a known export extension: rewrites extension to ext (prevents mismatches) - Any provided directory components are stripped """ import os name = (raw_name or "").strip() name = os.path.basename(name.replace("\\", "/")) if name == "": return f"output{ext}" known_exts = (".npz", ".bvh", ".csv", ".png", ".mp4") lower = name.lower() if lower.endswith(known_exts): return os.path.splitext(name)[0] + ext root, cur_ext = os.path.splitext(name) if cur_ext == "": return name + ext return name def _get_render_size(event_client: viser.ClientHandle) -> tuple[int, int]: width = int(event_client.camera.image_width) height = int(event_client.camera.image_height) if width <= 0 or height <= 0: # Fall back to a reasonable default if the camera hasn't synced yet. return (1280, 720) return (width, height) def _round_up_to_multiple(value: int, multiple: int) -> int: if multiple <= 0: return value return ((value + multiple - 1) // multiple) * multiple def _download_canvas_to_browser(event_client: viser.ClientHandle, *, filename: str) -> None: """Use the client-side canvas save path to avoid server-side renders.""" import json js = f""" (() => {{ const filename = {json.dumps(filename)}; const canvases = Array.from(document.querySelectorAll("canvas")); if (!canvases.length) {{ console.error("No canvases found to save."); return; }} // Pick the largest canvas by area (usually the main 3D view). const canvas = canvases.reduce((best, cur) => {{ const bestArea = (best?.width || 0) * (best?.height || 0); const curArea = (cur?.width || 0) * (cur?.height || 0); return curArea > bestArea ? cur : best; }}, null); if (!canvas) {{ console.error("No canvas selected to save."); return; }} canvas.toBlob((blob) => {{ if (!blob) {{ console.error("Export failed"); return; }} const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); }}, "image/png"); }})(); """ from viser import _messages as _viser_messages event_client.gui._websock_interface.queue_message( # type: ignore[attr-defined] _viser_messages.RunJavascriptMessage(source=js) ) @gui_screenshot_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client if get_active_session(event_client) is None: return try: filename = _coerce_download_filename( str(gui_screenshot_path_text.value), ext=".png", ) _download_canvas_to_browser(event_client, filename=filename) event_client.add_notification( title="Screenshot download started", body=f"Saving {filename}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to download screenshot!", body=str(e), auto_close_seconds=10.0, color="red", ) @gui_video_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return recording_notification: viser.NotificationHandle | None = None try: recording_notification = event_client.add_notification( title="Recording video...", body="Saving frames, please wait.", loading=True, with_close_button=False, auto_close_seconds=None, color="blue", ) event_client.timeline.disable_constraints() width, height = _get_render_size(event_client) # Avoid ffmpeg macro block resizing warnings. width = _round_up_to_multiple(width, 16) height = _round_up_to_multiple(height, 16) original_frame = session.frame_idx frames = [] for frame_idx in range(session.max_frame_idx + 1): demo.set_frame( event_client.client_id, frame_idx, update_timeline=True, ) frames.append( event_client.get_render( height=height, width=width, transport_format="jpeg", ) ) # Restore the original frame (and timeline). demo.set_frame(event_client.client_id, original_frame) import imageio.v3 as iio filename = _coerce_download_filename( str(gui_video_path_text.value), ext=".mp4", ) payload = iio.imwrite( "", frames, extension=".mp4", fps=float(session.model_fps), codec="h264", plugin="pyav", ) event_client.send_file_download(filename, payload, save_immediately=True) event_client.add_notification( title="Video download started", body=f"Saving {filename}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to download video!", body=str(e), auto_close_seconds=10.0, color="red", ) finally: event_client.timeline.enable_constraints() if recording_notification is not None: recording_notification.remove() @gui_download_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return motion = _get_primary_motion(session) try: fmt = str(gui_download_format_dropdown.value).upper() raw_name = str(gui_download_name_text.value) if fmt == "BVH": filename = _coerce_download_filename(raw_name, ext=".bvh") payload = motion_to_bvh_bytes( motion.joints_local_rot, motion.joints_pos[:, session.skeleton.root_idx, :], # root positions skeleton=session.skeleton, fps=float(session.model_fps), standard_tpose=bool(gui_download_bvh_standard_tpose_checkbox.value), ) mime = "text/plain" elif fmt == "CSV": filename = _coerce_download_filename(raw_name, ext=".csv") payload = _motion_to_csv_bytes(motion, session) mime = "text/csv" elif fmt == "AMASS NPZ": filename = _coerce_download_filename(raw_name, ext=".npz") payload = _motion_to_amass_npz_bytes(motion, session) mime = "application/octet-stream" else: # Default to NPZ (most common and matches existing save/load). filename = _coerce_download_filename(raw_name, ext=".npz") payload = _motion_to_npz_bytes(motion) mime = "application/octet-stream" _download_bytes_to_browser( event_client, data=payload, filename=filename, mime_type=mime, ) event_client.add_notification( title="Download started", body=f"Saving {filename}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to download motion!", body=str(e), auto_close_seconds=10.0, color="red", ) @gui_save_example_button.on_click def _(event: viser.GuiEvent) -> None: from kimodo.tools import save_json event_client = event.client session = get_active_session(event_client) if session is None: return save_dir = gui_save_example_path_text.value if os.path.exists(save_dir): event_client.add_notification( title="Failed to save example!", body="Example directory already exists", auto_close_seconds=10.0, color="red", ) return try: os.makedirs(save_dir) # save the constraints constraint_path = os.path.join(save_dir, "constraints.json") save_constraints(event_client, constraint_path) # save the motion motion_path = os.path.join(save_dir, "motion.npz") save_motion(event_client, motion_path, "NPZ") # save the gui metadata meta_path = os.path.join(save_dir, "meta.json") prompt_texts = [] prompt_durations_sec = [] prompt_values = sorted( [x for x in client.timeline._prompts.values()], key=lambda x: x.start_frame, ) for i, prompt in enumerate(prompt_values): prompt_texts.append(prompt.text) # Match demo/generation convention: # non-last prompts: [start, end) ; last prompt: [start, end]. n_frames = prompt.end_frame - prompt.start_frame if i == len(prompt_values) - 1: n_frames += 1 prompt_durations_sec.append(n_frames / session.model_fps) if len(prompt_texts) == 1: meta_info = { "text": prompt_texts[0], "duration": prompt_durations_sec[0], } else: meta_info = { "texts": prompt_texts, "durations": prompt_durations_sec, } meta_info["num_samples"] = gui_num_samples_slider.value meta_info["seed"] = gui_seed.value meta_info["diffusion_steps"] = gui_diffusion_steps_slider.value meta_info["cfg"] = { "enabled": gui_cfg_checkbox.value, "text_weight": gui_cfg_text_weight_slider.value, "constraint_weight": gui_cfg_constraint_weight_slider.value, } save_json(meta_path, meta_info) # update the example dropdown session.example_dict = viser_utils.load_example_cases(session.examples_base_dir) update_examples_dropdown(session.example_dict, keep_selection=True) event_client.add_notification( title="Example saved!", body=f"Saved example to {save_dir}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to save example!", body=str(e), auto_close_seconds=10.0, color="red", ) def set_new_duration(client_id, new_duration): session = demo.client_sessions[client_id] session.cur_duration = new_duration update_duration_gui(new_duration) session.max_frame_idx = int(session.cur_duration * session.model_fps - 1) if session.frame_idx > session.max_frame_idx: demo.set_frame(client_id, session.max_frame_idx) def apply_model_selection(new_model_name: str) -> None: session = demo.client_sessions[client_id] if new_model_name == session.model_name: return session.playing = False # Pause playback when switching models. old_model_fps = session.model_fps old_duration = session.cur_duration old_prompts = [ (prompt.text, prompt.start_frame, prompt.end_frame) for prompt in client.timeline._prompts.values() ] old_default_zoom_frames = client.timeline._default_num_frames_zoom old_max_zoom_frames = client.timeline._max_frames_zoom model_bundle = demo.load_model(new_model_name) # Clear motions and constraints when switching models. if session.edit_mode and session.motions: exit_editing_mode(session) session.edit_mode = False demo.clear_motions(client_id) with session.timeline_data["keyframe_update_lock"]: for constraint in list(session.constraints.values()): constraint.clear() session.constraints = demo.build_constraint_tracks(client, model_bundle.skeleton) session.timeline_data["keyframes"] = {} session.timeline_data["intervals"] = {} client.timeline.clear_keyframes() client.timeline.clear_intervals() session.model_name = new_model_name session.model_fps = model_bundle.model_fps session.skeleton = model_bundle.skeleton session.motion_rep = model_bundle.motion_rep session.cur_duration = old_duration session.max_frame_idx = int(session.cur_duration * session.model_fps - 1) session.frame_idx = 0 session.edit_mode = False demo.set_timeline_defaults(client.timeline, session.model_fps) client.timeline.set_current_frame(0) gui_model_fps.value = session.model_fps update_duration_gui(session.cur_duration) if old_model_fps > 0: default_zoom_seconds = old_default_zoom_frames / old_model_fps max_zoom_seconds = old_max_zoom_frames / old_model_fps new_default_zoom = int(round(default_zoom_seconds * session.model_fps)) new_max_zoom = int(round(max_zoom_seconds * session.model_fps)) new_default_zoom = max(1, new_default_zoom) new_max_zoom = max(new_default_zoom, new_max_zoom) client.timeline.set_zoom_settings( default_num_frames_zoom=new_default_zoom, max_frames_zoom=new_max_zoom, ) client.timeline.clear_prompts() if old_prompts and old_model_fps > 0: for i, (prompt_text, start_frame, end_frame) in enumerate(old_prompts): start_sec = start_frame / old_model_fps end_sec = end_frame / old_model_fps new_start = int(round(start_sec * session.model_fps)) new_end = int(round(end_sec * session.model_fps)) new_start = max(0, min(new_start, session.max_frame_idx)) new_end = max(new_start, min(new_end, session.max_frame_idx)) color = PROMPT_COLORS[i % len(PROMPT_COLORS)] client.timeline.add_prompt(prompt_text, new_start, new_end, color=color) session.examples_base_dir = demo.get_examples_base_dir(new_model_name, absolute=True) session.example_dict = viser_utils.load_example_cases(session.examples_base_dir) update_examples_dropdown(session.example_dict, keep_selection=False) gui_save_example_path_text.value = os.path.join( demo.get_examples_base_dir(new_model_name, absolute=True), "custom_example_1", ) gui_load_example_path_text.value = os.path.join( demo.get_examples_base_dir(new_model_name, absolute=True), "custom_example_1", ) demo.add_character_motion(client, session.skeleton) apply_constraint_overlay_visibility(session) def _update_version_and_display_from_dataset_skeleton() -> None: dataset_ui = gui_dataset_selector.value skeleton_display = gui_skeleton_selector.value skeleton_val = get_skeleton_key_from_display_name(skeleton_display) if skeleton_val is None: return models = get_models_for_dataset_skeleton(dataset_ui, skeleton_val, family="Kimodo") if not models: return gui_version_selector.options = [m.display_name for m in models] gui_version_selector.value = models[0].display_name gui_version_selector.visible = len(models) > 1 gui_model_display.content = f"**Model:** {models[0].display_name}" def _update_visibility_for_loaded_model(loaded_model_name: str) -> None: """Update model-specific controls from the currently loaded model only.""" if not loaded_model_name: return _update_motion_export_dropdown(loaded_model_name) gui_use_soma_layer_checkbox.visible = "soma" in loaded_model_name _is_g1 = "g1" in loaded_model_name gui_real_robot_rotations_checkbox.visible = _is_g1 gui_postprocess_checkbox.visible = not _is_g1 gui_root_margin.visible = not _is_g1 and gui_postprocess_checkbox.value if _is_g1: gui_gizmo_space_dropdown.value = "Local" gui_gizmo_space_dropdown.visible = not _is_g1 gui_gizmo_space_dropdown.disabled = _is_g1 def _on_load_model_click(event: viser.GuiEvent) -> None: """Load the currently selected model (called from Load model button).""" if get_active_session(event.client) is None: return new_model_name = gui_model_selector.value if not new_model_name: return info = get_model_info(new_model_name) if info is None: return session = demo.client_sessions[event.client.client_id] if new_model_name == session.model_name: return loading_notif = event.client.add_notification( title="Loading model...", body=f"Loading {info.display_name}", loading=True, with_close_button=False, ) try: apply_model_selection(new_model_name) _update_visibility_for_loaded_model(new_model_name) loading_notif.title = "Model loaded" loading_notif.body = f"{info.display_name} is ready." loading_notif.loading = False loading_notif.with_close_button = True loading_notif.auto_close_seconds = 5.0 loading_notif.color = "green" except Exception as e: loading_notif.loading = False loading_notif.with_close_button = True event.client.add_notification( title="Model failed to load", body=str(e), color="red", auto_close_seconds=10.0, ) gui_model_selector.set_from_short_key(session.model_name) @gui_load_model_button.on_click def _(event: viser.GuiEvent) -> None: _on_load_model_click(event) @gui_dataset_selector.on_update def _(event: viser.GuiEvent) -> None: if get_active_session(event.client) is None: return skeleton_labels = get_allowed_skeleton_labels(gui_dataset_selector.value) gui_skeleton_selector.options = skeleton_labels gui_skeleton_selector.value = skeleton_labels[0] if skeleton_labels else "" _update_version_and_display_from_dataset_skeleton() @gui_skeleton_selector.on_update def _(event: viser.GuiEvent) -> None: if get_active_session(event.client) is None: return _update_version_and_display_from_dataset_skeleton() @gui_version_selector.on_update def _(event: viser.GuiEvent) -> None: if get_active_session(event.client) is None: return info = get_model_info(gui_model_selector.value) if info is not None: gui_model_display.content = f"**Model:** {info.display_name}" @gui_use_soma_layer_checkbox.on_update def _(event: viser.GuiEvent) -> None: session = get_active_session(event.client) if session is None or "soma" not in (session.model_name or ""): return loading_notif = event.client.add_notification( title="Applying SOMA layer...", body="Updating mesh.", loading=True, with_close_button=False, ) try: current_motion = list(session.motions.values())[0] if session.motions else None current_frame_idx = session.frame_idx # Recreate the character to apply the new SOMA mesh mode selection. demo.clear_motions(event.client.client_id) if current_motion is None: demo.add_character_motion(event.client, session.skeleton) else: demo.add_character_motion( event.client, session.skeleton, current_motion.joints_pos, current_motion.joints_rot, current_motion.foot_contacts, ) demo.set_frame(event.client.client_id, current_frame_idx) except Exception as e: print(e) event.client.add_notification( title="SOMA layer failed", body=str(e), color="red", auto_close_seconds=10.0, ) gui_use_soma_layer_checkbox.value = not gui_use_soma_layer_checkbox.value finally: loading_notif.loading = False loading_notif.with_close_button = True loading_notif.auto_close_seconds = 2.0 @gui_real_robot_rotations_checkbox.on_update def _(event: viser.GuiEvent) -> None: session = get_active_session(event.client) if session is None or "g1" not in session.model_name: return if not isinstance(session.skeleton, G1Skeleton34) or not session.motions: return if not gui_real_robot_rotations_checkbox.value: return # Reproject all displayed G1 motions to real robot DoF (1-DoF per joint + axis limits). from kimodo.skeleton import global_rots_to_local_rots current_frame_idx = session.frame_idx for motion in session.motions.values(): if motion.length <= 1: continue rest_pos = motion.joints_pos[0:1] rest_rot = motion.joints_rot[0:1] same_as_rest = (motion.joints_pos - rest_pos).abs().max().item() < 1e-6 and ( motion.joints_rot - rest_rot ).abs().max().item() < 1e-6 if same_as_rest: continue new_pos, new_rot = generation.apply_g1_real_robot_projection( session.skeleton, motion.joints_pos, motion.joints_rot, ) motion.joints_pos = new_pos motion.joints_rot = new_rot motion.joints_local_rot = global_rots_to_local_rots(new_rot, session.skeleton) # Refresh skeleton and skinned mesh caches so the viz uses new positions. motion.precompute_mesh_info() demo.set_frame(event.client.client_id, current_frame_idx) event.client.add_notification( title="Real robot projection applied", body="The motion is projected to G1 real robot DoF (1-DoF per joint, clamped to axis limits).", auto_close_seconds=4.0, color="green", ) def load_example_from_path( event_client: viser.ClientHandle, example_path: str, load_gt: bool = False, ) -> None: from kimodo.meta import parse_prompts_from_meta from kimodo.tools import load_json session = get_active_session(event_client) if session is None: return # Pause playback when loading an example. session.playing = False if not os.path.isdir(example_path): event_client.add_notification( title="Example path not found", body=f"Directory does not exist: {example_path}", auto_close_seconds=5.0, color="red", ) return # Long motions trigger a skinning precompute that can take several # seconds; show a persistent "loading" notification so the user # knows the app isn't frozen. Cleared in the finally block below. loading_notif = event_client.add_notification( title="Loading example...", body=f"Loading {os.path.basename(example_path.rstrip(os.sep))}. This may take a moment for long motions.", loading=True, with_close_button=False, ) try: # constraints constraints_path = os.path.join(example_path, "constraints.json") if os.path.exists(constraints_path): load_constraints(event_client, constraints_path) else: # clear all existing constraints with session.timeline_data["keyframe_update_lock"]: for constraint in list(session.constraints.values()): constraint.clear() event_client.timeline.clear_keyframes() event_client.timeline.clear_intervals() # motion motion_filename = "gt_motion.npz" if load_gt else "motion.npz" motion_path = os.path.join(example_path, motion_filename) if os.path.exists(motion_path): load_motion(event_client, motion_path) # metadata meta_path = os.path.join(example_path, "meta.json") if os.path.exists(meta_path): meta_info = load_json(meta_path) event_client.timeline.clear_prompts() texts, durations_sec = parse_prompts_from_meta(meta_info) fps = session.model_fps # Convert durations (seconds) to consecutive frame bounds num_frames = 0 frame_bounds = [] for i, d in enumerate(durations_sec): n_frames = max(1, int(round(d * fps))) start_frame = num_frames # Inverse of compute_prompt_num_frames(): # non-last prompts end at next prompt start (exclusive), # last prompt includes its end frame. if i == len(durations_sec) - 1: end_frame = num_frames + n_frames - 1 else: end_frame = num_frames + n_frames frame_bounds.append((start_frame, end_frame)) num_frames += n_frames # Adapt timeline zoom to the loaded motion. target_visible_frames = int(math.ceil(1.10 * num_frames)) event_client.timeline.set_zoom_settings( default_num_frames_zoom=target_visible_frames, ) for i, (prompt_text, (start_frame, end_frame)) in enumerate(zip(texts, frame_bounds)): color = PROMPT_COLORS[i % len(PROMPT_COLORS)] event_client.timeline.add_prompt(prompt_text, start_frame, end_frame, color=color) update_duration_auto() # Only load optional fields if present if "num_samples" in meta_info: gui_num_samples_slider.value = meta_info["num_samples"] if "seed" in meta_info: gui_seed.value = meta_info["seed"] if "diffusion_steps" in meta_info: gui_diffusion_steps_slider.value = meta_info["diffusion_steps"] if "cfg" in meta_info: cfg = meta_info["cfg"] if "enabled" in cfg: gui_cfg_checkbox.value = cfg["enabled"] if "text_weight" in cfg: gui_cfg_text_weight_slider.value = cfg["text_weight"] if "constraint_weight" in cfg: gui_cfg_constraint_weight_slider.value = cfg["constraint_weight"] # Set frame to 0 when example is loaded. session.frame_idx = 0 event_client.timeline.set_current_frame(0) demo.set_frame(event_client.client_id, 0) event_client.add_notification( title="Example loaded!", body=f"Loaded example from {example_path}", auto_close_seconds=5.0, color="green", ) except Exception as e: import traceback traceback.print_exc() event_client.add_notification( title="Failed to load example!", body=str(e), auto_close_seconds=10.0, color="red", ) finally: loading_notif.remove() @gui_load_example_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return if not session.example_dict or (gui_examples_dropdown.value not in session.example_dict): event_client.add_notification( title="No examples available", body="No examples found for the selected model.", auto_close_seconds=5.0, color="red", ) return example_path = session.example_dict[gui_examples_dropdown.value] load_example_from_path(event_client, example_path, gui_load_gt_checkbox.value) @gui_load_example_from_path_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return example_path = gui_load_example_path_text.value if not example_path: event_client.add_notification( title="No example path", body="Please provide an example directory.", auto_close_seconds=5.0, color="red", ) return load_example_from_path(event_client, example_path, gui_load_gt_checkbox.value) @gui_cfg_checkbox.on_update def _(_) -> None: if not demo.client_active(client_id): return val = gui_cfg_checkbox.value gui_cfg_text_weight_slider.visible = val gui_cfg_constraint_weight_slider.visible = val def exit_editing_mode(session: ClientSession): gui_edit_constraint_button.label = "Enter Editing Mode" gui_generate_button.disabled = False gui_generate_button.label = "Generate" gui_reset_constraint_button.disabled = True if "g1" in session.model_name: gui_gizmo_space_dropdown.value = "Local" gui_gizmo_space_dropdown.disabled = True gui_gizmo_space_dropdown.visible = False else: gui_gizmo_space_dropdown.disabled = False gui_gizmo_space_dropdown.visible = True gui_undo_drag_button.disabled = True gui_use_soma_layer_checkbox.disabled = False session.edit_mode_snapshot = None session.undo_drag_snapshot = None motion = list(session.motions.values())[0] motion.clear_all_gizmos() motion.character.set_skinned_mesh_wireframe(False) motion.character.set_skeleton_visibility(False) motion.character.set_skinned_mesh_visibility(True) motion.character.set_skinned_mesh_opacity(1.0) session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value = 1.0 # If the path is dense, put the motion back on the path if "2D Root" in session.constraints and session.constraints["2D Root"].dense_path: _update_dense_path(motion, session) gui_viz_skinned_mesh_checkbox.value = True gui_viz_skeleton_checkbox.value = False # enter editing mode callback @gui_edit_constraint_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return session.edit_mode = not session.edit_mode edit_alert = "Entered editing mode" no_edit_alert = "Exited editing mode" edit_message = "You can now modify pose or path constraints." no_edit_message = "Can now generate motions." event_client.add_notification( title=edit_alert if session.edit_mode else no_edit_alert, body=edit_message if session.edit_mode else no_edit_message, auto_close_seconds=10.0, color="blue", ) if session.edit_mode: gui_edit_constraint_button.label = "Exit Editing Mode" gui_generate_button.disabled = True gui_generate_button.label = "Generate Disabled In Editing Mode" if "g1" in session.model_name: gui_gizmo_space_dropdown.value = "Local" gui_gizmo_space_dropdown.disabled = True gui_use_soma_layer_checkbox.disabled = True assert len(session.motions) == 1, "Only one motion allowed in edit mode" motion = list(session.motions.values())[0] snapshot_frame_idx = min(session.frame_idx, motion.length - 1) session.edit_mode_snapshot = {} ensure_edit_snapshot(session, motion, snapshot_frame_idx) gui_reset_constraint_button.disabled = False motion.character.set_skeleton_visibility(True) # motion.character.set_skinned_mesh_wireframe(True) motion.character.set_skinned_mesh_opacity(0.65) session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value = 0.65 motion.character.set_skinned_mesh_visibility(True) gui_viz_skinned_mesh_checkbox.value = True gui_viz_skeleton_checkbox.value = True # need gizmos for root translation and individual joints def _on_root2d_gizmo_release(): if "2D Root" in session.constraints and session.constraints["2D Root"].dense_path: mot = list(session.motions.values())[0] _update_dense_path(mot, session) def _on_gizmo_drag_start(): mot = list(session.motions.values())[0] frame_idx = min(session.frame_idx, mot.length - 1) session.undo_drag_snapshot = { "frame_idx": frame_idx, "joints_pos": mot.get_joints_pos(frame_idx), "joints_rot": mot.get_joints_rot(frame_idx), } gui_undo_drag_button.disabled = False motion.add_root_translation_gizmo( session.constraints, on_2d_root_drag_end=_on_root2d_gizmo_release, on_drag_start=_on_gizmo_drag_start, ) gizmo_space = "local" if "g1" in session.model_name else gui_gizmo_space_dropdown.value.lower() motion.add_joint_gizmos( session.constraints, space=gizmo_space, on_drag_start=_on_gizmo_drag_start, ) else: exit_editing_mode(session) @gui_reset_constraint_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None or not session.edit_mode_snapshot: return if not session.motions: return motion = list(session.motions.values())[0] snapshot_frame_idx = min(session.frame_idx, motion.length - 1) if snapshot_frame_idx not in session.edit_mode_snapshot: return motion.update_pose_at_frame( snapshot_frame_idx, joints_pos=session.edit_mode_snapshot[snapshot_frame_idx]["joints_pos"], joints_rot=session.edit_mode_snapshot[snapshot_frame_idx]["joints_rot"], ) demo.set_frame(event_client.client_id, snapshot_frame_idx, update_timeline=False) @gui_undo_drag_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None or session.undo_drag_snapshot is None: return if not session.motions: return motion = list(session.motions.values())[0] frame_idx = session.undo_drag_snapshot["frame_idx"] motion.update_pose_at_frame( frame_idx, joints_pos=session.undo_drag_snapshot["joints_pos"], joints_rot=session.undo_drag_snapshot["joints_rot"], ) demo.set_frame(event_client.client_id, frame_idx, update_timeline=False) session.undo_drag_snapshot = None gui_undo_drag_button.disabled = True def validate_interval(start_frame_idx: int, end_frame_idx: int, max_frame_idx: int) -> bool: if start_frame_idx < 0 or start_frame_idx > max_frame_idx: return False if end_frame_idx < 0 or end_frame_idx > max_frame_idx: return False if end_frame_idx < start_frame_idx: return False return True def clamp_interval_to_range( start_frame_idx: int, end_frame_idx: int, max_frame_idx: int ) -> Optional[tuple[int, int]]: if end_frame_idx < 0 or start_frame_idx > max_frame_idx: return None start_clamped = max(0, start_frame_idx) end_clamped = min(max_frame_idx, end_frame_idx) if end_clamped < start_clamped: return None return start_clamped, end_clamped # add constraint callback def add_constraint_callback( constraint_id: str, constraint_type: str, frame_range: tuple[int, int], joint_names: list[str] = None, verbose: bool = True, ): """Add a constraint to the session. Args: constraint_type: str, the type of constraint to add frame_range: tuple[int, int], the frame range to add the constraint to joint_names: list[str], the names of the joints to constraint if the constraint type is End-Effectors """ # Check if session still exists if not demo.client_active(client_id): return session = demo.client_sessions[client_id] assert len(session.motions) == 1, "Only one motion allowed for adding constraints" motion = list(session.motions.values())[0] end_effector_type = None if constraint_type in [ "Left Hand", "Right Hand", "Left Foot", "Right Foot", ]: joint_names = [constraint_type.replace(" ", ""), "Hips"] # Hips are required because of smooth root representation end_effector_type = constraint_type.replace(" ", "-").lower() constraint_type = "End-Effectors" # check to make sure interval is valid is_interval = frame_range[1] != frame_range[0] start_frame_idx = int(frame_range[0]) end_frame_idx = int(frame_range[1]) if is_interval: clamped = clamp_interval_to_range(start_frame_idx, end_frame_idx, session.max_frame_idx) if clamped is None: print("Interval outside range! Couldn't add constraint.") return start_frame_idx, end_frame_idx = clamped else: if not validate_interval(start_frame_idx, end_frame_idx, session.max_frame_idx): print("Invalid interval! Couldn't add constraint.") return # collect input args for the constraint based on which track it is if is_interval: constraint_kwargs = { "interval_id": constraint_id, "start_frame_idx": start_frame_idx, "end_frame_idx": end_frame_idx, } else: constraint_kwargs = { "keyframe_id": constraint_id, "frame_idx": start_frame_idx, } if constraint_type in ["Full-Body", "End-Effectors"]: constraint_kwargs["joints_pos"] = motion.get_joints_pos(start_frame_idx, end_frame_idx) constraint_kwargs["joints_rot"] = motion.get_joints_rot(start_frame_idx, end_frame_idx) if constraint_type == "End-Effectors": constraint_kwargs["joint_names"] = joint_names constraint_kwargs["end_effector_type"] = end_effector_type elif constraint_type == "2D Root": constraint_kwargs["root_pos"] = motion.get_projected_root_pos(start_frame_idx, end_frame_idx) # add the keyframe(s) to the constraint track constraint = session.constraints[constraint_type] if is_interval: constraint.add_interval(**constraint_kwargs) else: constraint.add_keyframe(**constraint_kwargs) apply_constraint_overlay_visibility(session) if verbose: client.add_notification( title="Constraint added", body="", auto_close_seconds=5.0, color="blue", ) # timeline callbacks for keyframes and intervals @client.timeline.on_keyframe_add def _(keyframe_id: str, track_id: str, frame: int): """Called when a keyframe is added to a track.""" if not demo.client_active(client_id): return session = demo.client_sessions[client_id] with session.timeline_data["keyframe_update_lock"]: constraint_type = session.timeline_data["tracks"][track_id]["name"] add_constraint_callback( keyframe_id, constraint_type, (frame, frame), verbose=False, ) keyframe_data = client.timeline._keyframes.get(keyframe_id) session.timeline_data["keyframes"][keyframe_id] = { "frame": frame, "track_id": track_id, "locked": bool(keyframe_data.locked) if keyframe_data is not None else False, "opacity": keyframe_data.opacity if keyframe_data is not None else 1.0, "value": keyframe_data.value if keyframe_data is not None else None, } # Update smooth path when adding a keyframe (single action, not drag). if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path: motion = list(session.motions.values())[0] _update_dense_path(motion, session) @client.timeline.on_interval_add def handle_interval_add(interval_id: str, track_id: str, start_frame: int, end_frame: int): """Called when an interval is added to a track.""" if not demo.client_active(client_id): return session = demo.client_sessions[client_id] with session.timeline_data["keyframe_update_lock"]: constraint_type = session.timeline_data["tracks"][track_id]["name"] add_constraint_callback( interval_id, constraint_type, (start_frame, end_frame), verbose=False, ) interval_data = client.timeline._intervals.get(interval_id) session.timeline_data["intervals"][interval_id] = { "track_id": track_id, "start_frame_idx": start_frame, "end_frame_idx": end_frame, "locked": bool(interval_data.locked) if interval_data is not None else False, "opacity": interval_data.opacity if interval_data is not None else 1.0, "value": interval_data.value if interval_data is not None else None, } if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path: motion = list(session.motions.values())[0] _update_dense_path(motion, session) def remove_constraint_callback( constraint_id: str, constraint_type: str, frame_range: tuple[int, int], verbose: bool = True, ) -> None: if not demo.client_active(client_id): return session = demo.client_sessions[client_id] session.updating_motions = True is_interval = frame_range[1] != frame_range[0] start_frame_idx = int(frame_range[0]) end_frame_idx = int(frame_range[1]) if is_interval: clamped = clamp_interval_to_range(start_frame_idx, end_frame_idx, session.max_frame_idx) if clamped is None: return start_frame_idx, end_frame_idx = clamped else: if not validate_interval(start_frame_idx, end_frame_idx, session.max_frame_idx): print("Invalid interval! Couldn't remove constraint.") return if constraint_type in [ "Left Hand", "Right Hand", "Left Foot", "Right Foot", ]: constraint_type = "End-Effectors" constraint = session.constraints[constraint_type] if is_interval: constraint.remove_interval(constraint_id, start_frame_idx, end_frame_idx) else: constraint.remove_keyframe(constraint_id, start_frame_idx) if verbose: client.add_notification( title="Constraint removed", body="", auto_close_seconds=5.0, color="blue", ) @client.timeline.on_keyframe_move def handle_keyframe_move(keyframe_id: str, new_frame: int): """Called when a keyframe is moved to a new frame.""" # print(f"Keyframe moved: {keyframe_id} to frame {new_frame}") if not demo.client_active(client_id): return session = demo.client_sessions[client_id] # Cancel any pending timer for this keyframe timeline_data = session.timeline_data with timeline_data["keyframe_update_lock"]: if keyframe_id in timeline_data["keyframe_move_timers"]: timeline_data["keyframe_move_timers"][keyframe_id].cancel() # Store the latest target frame timeline_data["pending_keyframe_moves"][keyframe_id] = new_frame # Create a new timer to execute the actual move after a delay # This debounces rapid movements - only execute when user stops moving timer = threading.Timer( 0.03, # 10ms delay - adjust as needed _execute_keyframe_move, args=(client_id, keyframe_id, new_frame, session), ) timeline_data["keyframe_move_timers"][keyframe_id] = timer timer.start() def _execute_keyframe_move( client_id: int, keyframe_id: str, new_frame: int, session: ClientSession, ): """Actually execute the keyframe move operation (called after debounce delay).""" timeline_data = session.timeline_data with timeline_data["keyframe_update_lock"]: # Check if this move is still the latest one if keyframe_id not in timeline_data["pending_keyframe_moves"]: return # Move was cancelled if timeline_data["pending_keyframe_moves"][keyframe_id] != new_frame: return # A newer move superseded this one # Remove from pending del timeline_data["pending_keyframe_moves"][keyframe_id] if keyframe_id in timeline_data["keyframe_move_timers"]: del timeline_data["keyframe_move_timers"][keyframe_id] # Now execute the actual move (keep it in the lock so we don't delete it while moving) if keyframe_id not in timeline_data["keyframes"]: # double check return keyframe_data = timeline_data["keyframes"][keyframe_id] if not keyframe_data: return # if the frame did not move, don't do anything if keyframe_data["frame"] == new_frame: return track_id = keyframe_data["track_id"] constraint_type = timeline_data["tracks"][track_id]["name"] cur_frame = keyframe_data["frame"] # Remove constraint at old frame remove_constraint_callback( keyframe_id, constraint_type, (cur_frame, cur_frame), verbose=False, ) # Add constraint at new frame add_constraint_callback( keyframe_id, constraint_type, (new_frame, new_frame), verbose=False, ) # update our data keyframe_data["frame"] = new_frame # Schedule path update only after user stops dragging (no move for 300ms). if constraint_type == "2D Root": _schedule_dense_path_after_release(session) @client.timeline.on_keyframe_delete def handle_keyframe_delete(keyframe_id: str): """Called when a keyframe is deleted.""" if not demo.client_active(client_id): return session = demo.client_sessions[client_id] with session.timeline_data["keyframe_update_lock"]: if keyframe_id not in session.timeline_data["keyframes"]: return keyframe_data = session.timeline_data["keyframes"][keyframe_id] track_id = keyframe_data["track_id"] constraint_type = session.timeline_data["tracks"][track_id]["name"] cur_frame = keyframe_data["frame"] remove_constraint_callback( keyframe_id, constraint_type, (cur_frame, cur_frame), verbose=False, ) del session.timeline_data["keyframes"][keyframe_id] if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path: motion = list(session.motions.values())[0] _update_dense_path(motion, session) @client.timeline.on_interval_move def handle_interval_move(interval_id: str, new_start: int, new_end: int): """Called when an interval is moved or resized.""" # print(f"Interval moved: {interval_id} to {new_start}-{new_end}") if not demo.client_active(client_id): return session = demo.client_sessions[client_id] # Cancel any pending timer for this interval # We share the same lock for keyframe and interval moves assuming the user can't move both at the same time timeline_data = session.timeline_data with timeline_data["keyframe_update_lock"]: if interval_id in timeline_data["keyframe_move_timers"]: timeline_data["keyframe_move_timers"][interval_id].cancel() # Store the latest target frame new_interval = (new_start, new_end) timeline_data["pending_keyframe_moves"][interval_id] = new_interval # Create a new timer to execute the actual move after a delay # This debounces rapid movements - only execute when user stops moving timer = threading.Timer( 0.5, # 100ms delay - adding interval is much slower than moving a keyframe _execute_interval_move, args=(client_id, interval_id, new_interval, session), ) timeline_data["keyframe_move_timers"][interval_id] = timer timer.start() def _execute_interval_move( client_id: int, interval_id: str, new_interval: tuple[int, int], session: ClientSession, ): """Actually execute the interval move operation (called after debounce delay).""" timeline_data = session.timeline_data with timeline_data["keyframe_update_lock"]: # Check if this move is still the latest one if interval_id not in timeline_data["pending_keyframe_moves"]: return # Move was cancelled if timeline_data["pending_keyframe_moves"][interval_id] != new_interval: return # A newer move superseded this one # Remove from pending del timeline_data["pending_keyframe_moves"][interval_id] if interval_id in timeline_data["keyframe_move_timers"]: del timeline_data["keyframe_move_timers"][interval_id] # Now execute the actual move if interval_id not in timeline_data["intervals"]: return interval_data = timeline_data["intervals"][interval_id] if not interval_data: return # if the interval did not move, don't do anything if ( interval_data["start_frame_idx"] == new_interval[0] and interval_data["end_frame_idx"] == new_interval[1] ): return track_id = interval_data["track_id"] constraint_type = timeline_data["tracks"][track_id]["name"] cur_range = ( interval_data["start_frame_idx"], interval_data["end_frame_idx"], ) # Remove constraint at old frame remove_constraint_callback( interval_id, constraint_type, cur_range, verbose=False, ) # Add constraint at new frame add_constraint_callback( interval_id, constraint_type, new_interval, verbose=False, ) # update our data interval_data["start_frame_idx"] = new_interval[0] interval_data["end_frame_idx"] = new_interval[1] # Schedule path update only after user stops dragging (no move for 300ms). if constraint_type == "2D Root": _schedule_dense_path_after_release(session) @client.timeline.on_interval_delete def handle_interval_delete(interval_id: str): """Called when an interval is deleted.""" if not demo.client_active(client_id): return session = demo.client_sessions[client_id] with session.timeline_data["keyframe_update_lock"]: if interval_id not in session.timeline_data["intervals"]: return interval_data = session.timeline_data["intervals"][interval_id] track_id = interval_data["track_id"] constraint_type = session.timeline_data["tracks"][track_id]["name"] remove_constraint_callback( interval_id, constraint_type, ( interval_data["start_frame_idx"], interval_data["end_frame_idx"], ), verbose=False, ) del session.timeline_data["intervals"][interval_id] if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path: motion = list(session.motions.values())[0] _update_dense_path(motion, session) @gui_snap_to_constraint_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return target_character_motion = list(session.motions.values())[0] frame_idx = session.frame_idx if frame_idx >= target_character_motion.length: # frame idx larger than the motion, could not snap return for constraint_name in ["Full-Body", "End-Effectors"]: if ( constraint_name in session.constraints and frame_idx in session.constraints[constraint_name].keyframes ): pos = session.constraints[constraint_name].keyframes[frame_idx]["joints_pos"] rot = session.constraints[constraint_name].keyframes[frame_idx]["joints_rot"] # update the full joints_pos of the character to match the constraints target_character_motion.update_pose_at_frame( frame_idx, joints_pos=pos, joints_rot=rot, ) target_character_motion.set_frame(frame_idx) return # motion already fully changed if "2D Root" in session.constraints and frame_idx in session.constraints["2D Root"].keyframes: # update only the root position new_root_pos = session.constraints["2D Root"].keyframes[frame_idx] old_root_pos = target_character_motion.get_projected_root_pos(frame_idx) root_diff = new_root_pos - old_root_pos root_diff[1] = 0.0 # don't change height new_joints_pos = ( target_character_motion.joints_pos[frame_idx] + to_torch( root_diff, device=target_character_motion.joints_pos.device, dtype=target_character_motion.joints_pos.dtype, )[None] ) rot = target_character_motion.joints_rot[frame_idx] target_character_motion.update_pose_at_frame( frame_idx, joints_pos=new_joints_pos, joints_rot=rot, ) target_character_motion.set_frame(frame_idx) @gui_clear_all_constraints_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return with session.timeline_data["keyframe_update_lock"]: # use the lock here to wait for any constraint updates to finish for constraint in list(session.constraints.values()): constraint.clear() client.timeline.clear_keyframes() client.timeline.clear_intervals() if gui_dense_path_checkbox.value: gui_dense_path_checkbox.value = False if "2D Root" in session.constraints: session.constraints["2D Root"].set_dense_path(False) # generation callback @gui_generate_button.on_click def _(event: viser.GuiEvent) -> None: event_client = event.client session = get_active_session(event_client) if session is None: return generating_notif = event_client.add_notification( title="Generating motion...", body="Generating motions for the given prompt!", loading=True, with_close_button=False, ) gui_generate_button.disabled = True client.timeline.disable_constraints() num_samples = gui_num_samples_slider.value timeline = session.client.timeline # sort them to avoid issues: prompt_values = sorted([x for x in timeline._prompts.values()], key=lambda x: x.start_frame) texts = [x.text for x in prompt_values] num_frames = compute_prompt_num_frames(prompt_values) # compute the total duration total_nb_frames = sum(num_frames) total_duration = total_nb_frames / session.model_fps # update just in case set_new_duration(client_id, total_duration) transitions_parameters = { "num_transition_frames": gui_num_transition_frames_slider.value, } # G1: postprocessing is disabled (does not work well for this model). postprocess_parameters = { "post_processing": (False if "g1" in session.model_name else gui_postprocess_checkbox.value), "root_margin": gui_root_margin.value, } try: demo.generate( event_client, texts, num_frames, num_samples, gui_seed.value, gui_diffusion_steps_slider.value, cfg_weight=[ gui_cfg_text_weight_slider.value, gui_cfg_constraint_weight_slider.value, ], cfg_type="separated" if gui_cfg_checkbox.value else "nocfg", postprocess_parameters=postprocess_parameters, transitions_parameters=transitions_parameters, real_robot_rotations=gui_real_robot_rotations_checkbox.value, ) session.max_frame_idx = int(session.cur_duration * session.model_fps - 1) session.max_frame_idx = int(session.cur_duration * session.model_fps) - 1 if session.frame_idx > session.max_frame_idx: session.frame_idx = session.max_frame_idx if num_samples > 1: # add mesh selector to choose character to commit def commit_motion(event: viser.GuiEvent) -> None: target = event.target commit_name = target.name.split("/")[1] # e.g. /character0/simple_skinned print(f"Committing motion for character: {commit_name}") # delete non-selected motions new_motion_kwargs = None for character_name, motion in session.motions.items(): if character_name == commit_name: new_motion_kwargs = { "skeleton": session.skeleton, "joints_rot": motion.joints_rot, "foot_contacts": motion.foot_contacts, } root_x_offset = motion.joints_pos[0, session.skeleton.root_idx, 0] new_joints_pos = motion.joints_pos.clone() new_joints_pos[..., 0] -= root_x_offset new_motion_kwargs["joints_pos"] = new_joints_pos break # clear and re-add the selected motion demo.clear_motions(event_client.client_id) demo.add_character_motion(event_client, **new_motion_kwargs) gui_edit_constraint_button.disabled = False gui_generate_button.disabled = False gui_snap_to_constraint_button.disabled = False client.timeline.enable_constraints() gui_generate_button.label = "Generate" gui_save_example_button.disabled = False gui_save_motion_button.disabled = False gui_download_button.disabled = False gui_save_constraints_button.disabled = False gui_load_example_button.disabled = False for motion in session.motions.values(): char = motion.character character_name = char.name # e.g. "character0" if char.skinned_mesh is not None: char.skinned_mesh.on_click(commit_motion) elif char.g1_mesh_rig is not None: # Register click on every part so any part can be clicked, # and use highlight_group so the whole robot highlights together. for handle in char.g1_mesh_rig.mesh_handles: handle.on_click(commit_motion, highlight_group=character_name) gui_edit_constraint_button.disabled = True gui_generate_button.disabled = True gui_snap_to_constraint_button.disabled = True gui_generate_button.label = "Choose Sample Before Generating" gui_save_example_button.disabled = True gui_save_motion_button.disabled = True gui_download_button.disabled = True gui_save_constraints_button.disabled = True gui_load_example_button.disabled = True else: gui_edit_constraint_button.disabled = False gui_generate_button.disabled = False gui_snap_to_constraint_button.disabled = False client.timeline.enable_constraints() generating_notif.title = "Motion generation finished!" generating_notif.body = "Motions have been generated successfully for the given prompt." if num_samples > 1: generating_notif.body += " Now choose which sample to commit." generating_notif.loading = False generating_notif.with_close_button = True generating_notif.auto_close_seconds = 5.0 generating_notif.color = "green" # put the motion at zero demo.set_frame(client_id, 0) except Exception as e: import traceback traceback.print_exc() print(f"Error during generation for client {event_client.client_id}: {e}") # Re-enable buttons and notify the user if event_client.client_id in demo.client_sessions: session = demo.client_sessions[event_client.client_id] gui_generate_button.disabled = False gui_load_example_button.disabled = False gui_save_example_button.disabled = False gui_save_motion_button.disabled = False gui_download_button.disabled = False try: event_client.add_notification( title="Generation failed!", body=f"Error: {str(e)}", auto_close_seconds=5.0, color="red", ) except Exception: pass demo.check_cuda_health() # # Visualization settings # with tab_group.add_tab("Visualize", viser.Icon.EYE): with client.gui.add_folder("Playback", expand_by_default=True): gui_model_fps = client.gui.add_number("Model FPS", initial_value=model_fps, disabled=True) gui_playback_speed_buttons = client.gui.add_button_group( "Playback Speed", options=[ "0.5x", "1x", "2x", ], ) gui_playback_speed_buttons.value = "1x" @client.timeline.on_frame_change def handle_timeline_frame_change(new_frame_idx: int): """Update the frame when the user clicks on the timeline.""" demo.set_frame(client_id, new_frame_idx, update_timeline=False) session = demo.client_sessions.get(client_id) if session is not None: if session.edit_mode and session.motions: motion = list(session.motions.values())[0] snapshot_frame_idx = min(session.frame_idx, motion.length - 1) ensure_edit_snapshot(session, motion, snapshot_frame_idx) update_snap_to_constraint_button(session) @client.timeline.on_prompt_add async def _on_add( prompt_id: str, start_frame: int, end_frame: int, text: str, color: tuple[int, int, int] | None, ) -> None: update_duration_auto() @client.timeline.on_prompt_update async def _on_update(prompt_id: str, new_text: str) -> None: update_duration_auto() @client.timeline.on_prompt_resize async def _on_resize(prompt_id: str, new_start: int, new_end: int) -> None: update_duration_auto() @client.timeline.on_prompt_move async def _on_move(prompt_id: str, new_start: int, new_end: int) -> None: update_duration_auto() @client.timeline.on_prompt_delete async def _on_delete(prompt_id: str) -> None: update_duration_auto() def play_pause_button_callback(session: ClientSession): session.playing = not session.playing def next_frame_callback(session: ClientSession): if session.frame_idx < session.max_frame_idx: session.frame_idx += 1 if session.frame_idx == session.max_frame_idx: pass demo.set_frame(client_id, session.frame_idx) def prev_frame_callback(session: ClientSession): if session.frame_idx > 0: session.frame_idx -= 1 if session.frame_idx == 0: pass demo.set_frame(client_id, session.frame_idx) @gui_playback_speed_buttons.on_click def _(_) -> None: if not demo.client_active(client_id): return speed_map = { "0.5x": 0.5, "1x": 1.0, "2x": 2.0, } session = demo.client_sessions[client_id] session.playback_speed = speed_map[gui_playback_speed_buttons.value] with client.gui.add_folder("Body options", expand_by_default=True): gui_viz_skinned_mesh_checkbox = client.gui.add_checkbox("Show Mesh", initial_value=True) gui_viz_skinned_mesh_opacity_slider = client.gui.add_slider( "Mesh Opacity", min=0.0, max=1.0, step=0.01, initial_value=1.0 ) gui_viz_skeleton_checkbox = client.gui.add_checkbox("Show Skeleton", initial_value=False) gui_viz_foot_contacts_checkbox = client.gui.add_checkbox("Show Foot Contacts", initial_value=False) gui_viz_foot_contacts_checkbox.visible = gui_viz_skeleton_checkbox.value with client.gui.add_folder("Camera options", expand_by_default=True): gui_camera_fov_slider = client.gui.add_slider( "Camera FOV (deg)", min=30.0, max=90.0, step=1.0, initial_value=45.0, ) client.camera.fov = np.deg2rad(gui_camera_fov_slider.value) with client.gui.add_folder("Interface options", expand_by_default=True): gui_show_timeline_checkbox = client.gui.add_checkbox( "Show Timeline", initial_value=True, ) gui_show_constraint_tracks_checkbox = client.gui.add_checkbox( "Show Constraint tracks", initial_value=True, ) gui_show_constraint_labels_checkbox = client.gui.add_checkbox( "Show Constraint labels", initial_value=True, ) gui_show_starting_direction_checkbox = client.gui.add_checkbox( "Show Starting Direction", initial_value=True, ) gui_dark_mode_checkbox = client.gui.add_checkbox( "Dark Mode", initial_value=False, # Default to light mode ) gui_show_constraint_tracks_checkbox.visible = gui_show_timeline_checkbox.value demo.set_start_direction_visible(client_id, gui_show_starting_direction_checkbox.value) @gui_dark_mode_checkbox.on_update def _(_): # Apply the theme using configure_theme (pass uuid so titlebar toggle stays) demo.configure_theme( client, gui_dark_mode_checkbox.value, titlebar_dark_mode_checkbox_uuid=gui_dark_mode_checkbox.uuid, ) session = demo.client_sessions[client.client_id] for motion in session.motions.values(): motion.character.change_theme(gui_dark_mode_checkbox.value) # Show dark mode toggle in titlebar (right of Github), hide sidebar checkbox demo.configure_theme( client, gui_dark_mode_checkbox.value, titlebar_dark_mode_checkbox_uuid=gui_dark_mode_checkbox.uuid, ) gui_dark_mode_checkbox.visible = False @gui_show_constraint_labels_checkbox.on_update def _(_): if not demo.client_active(client_id): return session = demo.client_sessions[client_id] for constraint in session.constraints.values(): constraint.set_label_visibility(gui_show_constraint_labels_checkbox.value) @gui_show_timeline_checkbox.on_update def _(_): if not demo.client_active(client_id): return session = demo.client_sessions[client_id] session.client.timeline.set_visible(gui_show_timeline_checkbox.value) gui_show_constraint_tracks_checkbox.visible = gui_show_timeline_checkbox.value if gui_show_timeline_checkbox.value: demo.set_constraint_tracks_visible(session, gui_show_constraint_tracks_checkbox.value) @gui_show_constraint_tracks_checkbox.on_update def _(_): if not demo.client_active(client_id): return session = demo.client_sessions[client_id] demo.set_constraint_tracks_visible(session, gui_show_constraint_tracks_checkbox.value) @gui_show_starting_direction_checkbox.on_update def _(_): if not demo.client_active(client_id): return demo.set_start_direction_visible(client_id, gui_show_starting_direction_checkbox.value) @gui_viz_skeleton_checkbox.on_update def _(_) -> None: if not demo.client_active(client_id): return session = demo.client_sessions[client_id] gui_viz_foot_contacts_checkbox.visible = gui_viz_skeleton_checkbox.value if not gui_viz_skeleton_checkbox.value: gui_viz_foot_contacts_checkbox.value = False for motion in session.motions.values(): motion.character.set_skeleton_visibility(gui_viz_skeleton_checkbox.value) @gui_viz_foot_contacts_checkbox.on_update def _(_) -> None: if not demo.client_active(client_id): return session = demo.client_sessions[client_id] for motion in session.motions.values(): motion.character.set_show_foot_contacts( gui_viz_foot_contacts_checkbox.value, frame_idx=motion.cur_frame_idx ) @gui_viz_skinned_mesh_checkbox.on_update def _(_) -> None: if not demo.client_active(client_id): return session = demo.client_sessions[client_id] for motion in session.motions.values(): motion.character.set_skinned_mesh_visibility(gui_viz_skinned_mesh_checkbox.value) @gui_viz_skinned_mesh_opacity_slider.on_update def _(_) -> None: if not demo.client_active(client_id): return session = demo.client_sessions[client_id] for motion in session.motions.values(): motion.character.set_skinned_mesh_opacity(gui_viz_skinned_mesh_opacity_slider.value) @gui_camera_fov_slider.on_update def _(_) -> None: if not demo.client_active(client_id): return client.camera.fov = np.deg2rad(gui_camera_fov_slider.value) # # Instructions tab # with tab_group.add_tab("Instructions", viser.Icon.INFO_CIRCLE): client.gui.add_markdown(DEMO_UI_INSTRUCTIONS_TAB_MD) # # Keyboard events # space_pressed = [False] @client.scene.on_keyboard_event("keydown", debounce_ms=100) def handle_key(event: viser.KeyboardEvent) -> None: # Check if client session still exists if client_id not in demo.client_sessions: return session = demo.client_sessions[client_id] if event.event_type == "keyup": if event.key == " ": space_pressed[0] = False return # Space bar: only toggle on FIRST press if event.key == " ": if not space_pressed[0]: space_pressed[0] = True play_pause_button_callback(session) return # Handle arrow keys: frame navigation (fast OS repeat with 50ms debounce). elif event.key == "ArrowLeft": prev_frame_callback(session) elif event.key == "ArrowRight": next_frame_callback(session) gui_elements = GuiElements( gui_play_pause_button=gui_play_pause_button, gui_next_frame_button=gui_next_frame_button, gui_prev_frame_button=gui_prev_frame_button, gui_generate_button=gui_generate_button, gui_model_fps=gui_model_fps, gui_timeline=gui_timeline, gui_viz_skeleton_checkbox=gui_viz_skeleton_checkbox, gui_viz_foot_contacts_checkbox=gui_viz_foot_contacts_checkbox, gui_viz_skinned_mesh_checkbox=gui_viz_skinned_mesh_checkbox, gui_viz_skinned_mesh_opacity_slider=gui_viz_skinned_mesh_opacity_slider, gui_camera_fov_slider=gui_camera_fov_slider, gui_duration_slider=gui_duration_slider, gui_num_samples_slider=gui_num_samples_slider, gui_cfg_checkbox=gui_cfg_checkbox, gui_cfg_text_weight_slider=gui_cfg_text_weight_slider, gui_cfg_constraint_weight_slider=gui_cfg_constraint_weight_slider, gui_diffusion_steps_slider=gui_diffusion_steps_slider, gui_seed=gui_seed, gui_postprocess_checkbox=gui_postprocess_checkbox, gui_root_margin=gui_root_margin, gui_real_robot_rotations_checkbox=gui_real_robot_rotations_checkbox, gui_dark_mode_checkbox=gui_dark_mode_checkbox, gui_use_soma_layer_checkbox=gui_use_soma_layer_checkbox, ) return ( gui_elements, timeline_tracks, example_dict, gui_examples_dropdown, gui_save_example_path_text, gui_model_selector, ) ================================================ FILE: kimodo/exports/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Export utilities: MuJoCo, BVH, SMPLX/AMASS, and motion I/O helpers.""" from .bvh import bvh_to_kimodo_motion, motion_to_bvh_bytes, read_bvh_frame_time_seconds, save_motion_bvh from .motion_convert_lib import convert_motion_files from .motion_formats import ( infer_npz_kind, infer_source_format_from_path, infer_target_format_from_path, resolve_source_fps, ) from .motion_io import ( KIMODO_CONVERT_TARGET_FPS, amass_npz_to_bytes, complete_motion_dict, g1_csv_to_bytes, kimodo_npz_to_bytes, load_amass_npz, load_g1_csv, load_kimodo_npz, load_kimodo_npz_as_torch, load_motion_file, motion_dict_to_numpy, save_kimodo_npz, save_kimodo_npz_at_target_fps, ) from .mujoco import MujocoQposConverter, apply_g1_real_robot_projection from .smplx import ( AMASSConverter, amass_npz_to_kimodo_motion, get_amass_parameters, kimodo_y_up_to_amass_coord_rotation_matrix, ) __all__ = [ "AMASSConverter", "KIMODO_CONVERT_TARGET_FPS", "MujocoQposConverter", "amass_npz_to_bytes", "amass_npz_to_kimodo_motion", "apply_g1_real_robot_projection", "bvh_to_kimodo_motion", "complete_motion_dict", "convert_motion_files", "g1_csv_to_bytes", "get_amass_parameters", "infer_npz_kind", "infer_source_format_from_path", "infer_target_format_from_path", "kimodo_npz_to_bytes", "kimodo_y_up_to_amass_coord_rotation_matrix", "load_amass_npz", "load_g1_csv", "load_kimodo_npz", "load_kimodo_npz_as_torch", "load_motion_file", "motion_dict_to_numpy", "motion_to_bvh_bytes", "read_bvh_frame_time_seconds", "resolve_source_fps", "save_kimodo_npz", "save_kimodo_npz_at_target_fps", "save_motion_bvh", ] ================================================ FILE: kimodo/exports/bvh.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Export utilities for converting internal motion representations into common file formats. This module is intended to hold lightweight serialization / export helpers that can be reused outside of interactive demos. """ import os import tempfile from pathlib import Path from typing import Tuple, Union import numpy as np import torch from kimodo.geometry import matrix_to_quaternion as _matrix_to_quaternion def _strip_end_site_blocks(bvh_text: str) -> str: """Remove all 'End Site { ... }' blocks from BVH text so output matches original format. bvhio adds an End Site for every leaf joint when writing; we do not set EndSite on joints, so we post-process the string to remove these blocks for Blender/original compatibility. """ lines = bvh_text.splitlines(keepends=True) result = [] i = 0 while i < len(lines): line = lines[i] if "End Site" in line: # Skip this line and the following block { ... }; brace-count to find closing } i += 1 if i < len(lines) and "{" in lines[i]: i += 1 depth = 1 while i < len(lines) and depth > 0: if "{" in lines[i]: depth += 1 if "}" in lines[i]: depth -= 1 i += 1 continue result.append(line) i += 1 return "".join(result) def _coerce_batch(name: str, x: torch.Tensor, *, expected_ndim: int) -> torch.Tensor: """Coerce (T, ...) or (1, T, ...) into (T, ...).""" if x.ndim == expected_ndim: return x if x.ndim == expected_ndim + 1: if int(x.shape[0]) != 1: raise ValueError( f"{name} has batch dimension B={int(x.shape[0])}, but BVH export " "only supports a single clip (B==1)." ) return x[0] raise ValueError(f"{name} must have shape (T, ...) or (1, T, ...); got {tuple(x.shape)}") def motion_to_bvh( local_rot_mats: torch.Tensor, root_positions: torch.Tensor, *, skeleton, fps: float, standard_tpose: bool = False, ) -> str: """Convert local rotations and root positions to BVH format; return UTF-8 string. Args: local_rot_mats: (T, J, 3, 3) or (1, T, J, 3, 3) local rotation matrices. root_positions: (T, 3) or (1, T, 3) root joint positions (e.g. from posed joints). skeleton: Skeleton with bone_order_names, bvh_neutral_joints, etc. fps: Frames per second for the motion. standard_tpose: If True, export with the rest pose being the standard T-pose rather than the rest pose consistent with the BONES-SEED dataset. Notes: BVH is plain-text. Root is named "Root" with ZYX rotation order; leaf joints have no End Site block. """ try: import bvhio # type: ignore[import-not-found] import glm # type: ignore[import-not-found] from SpatialTransform import Pose # type: ignore[import-not-found] except Exception as e: # pragma: no cover raise ImportError( "BVH export requires `bvhio` (and its deps `PyGLM` + `SpatialTransform`). " "Install with: `pip install bvhio`." ) from e local_rot_mats = local_rot_mats.detach() root_positions = root_positions.detach() # SOMA: accept either somaskel30 (convert to 77) or somaskel77 (use as-is) if skeleton.name == "somaskel30": local_rot_mats = skeleton.to_SOMASkeleton77(local_rot_mats) skeleton = skeleton.somaskel77 if standard_tpose: neutral = skeleton.neutral_joints.detach().cpu().numpy() else: # transform local rots to the original rest pose consistent with the BONES-SEED dataset local_rot_mats, _ = skeleton.from_standard_tpose(local_rot_mats) neutral = skeleton.bvh_neutral_joints.detach().cpu().numpy() joint_names = list(skeleton.bone_order_names) parents = skeleton.joint_parents.detach().cpu().numpy().astype(int) root_idx = int(skeleton.root_idx) local_rot_mats = _coerce_batch("local_rot_mats", local_rot_mats, expected_ndim=4) T, J = local_rot_mats.shape[:2] q_wxyz = _matrix_to_quaternion(local_rot_mats).detach().cpu().numpy() # [T, J, 4] root_xyz = _coerce_batch("root_positions", root_positions, expected_ndim=2) root_xyz = root_xyz.cpu().numpy() # [T, 3] # Build BVH hierarchy: Root (wrapper at origin) -> Hips (pelvis with offset in meters) -> ... # Offsets are in meters to match the original format. children: dict[int, list[int]] = {i: [] for i in range(J)} for i, p in enumerate(parents): if p >= 0: children[int(p)].append(int(i)) _ROOT_CHANNELS = [ "Xposition", "Yposition", "Zposition", "Zrotation", "Yrotation", "Xrotation", ] _JOINT_CHANNELS = ["Zrotation", "Yrotation", "Xrotation"] # Scale from meters to centimeters (match original SEED data BVH scale). neutral = neutral * 100 root_xyz = root_xyz * 100 # Hips offset from Root: use skeleton neutral; if root is at origin (zeros), use a # nominal pelvis height so the hierarchy is non-degenerate in Blender. hips_offset = neutral[root_idx] if (hips_offset == 0).all(): hips_offset = np.array([0.0, 100.0, 0.0], dtype=neutral.dtype) # 1 m in cm def _make_joint(i: int) -> "bvhio.BvhJoint": name = joint_names[i] j = bvhio.BvhJoint(name, offset=glm.vec3(0, 0, 0)) if i == root_idx: # Hips: offset from Root (origin) in cm off = hips_offset j.Offset = glm.vec3(float(off[0]), float(off[1]), float(off[2])) j.Channels = _ROOT_CHANNELS.copy() else: p = int(parents[i]) off = neutral[i] - neutral[p] j.Offset = glm.vec3(float(off[0]), float(off[1]), float(off[2])) j.Channels = _JOINT_CHANNELS.copy() for c in children[i]: j.Children.append(_make_joint(c)) return j # Wrapper Root at origin; single child is Hips (skeleton root). root_wrapper = bvhio.BvhJoint("Root", offset=glm.vec3(0.0, 0.0, 0.0)) root_wrapper.Channels = _ROOT_CHANNELS.copy() root_wrapper.Children.append(_make_joint(root_idx)) root_joint = root_wrapper # Populate keyframes: Root = identity/zero, Hips = root motion, others = local rotation. bvh_layout = root_joint.layout() name_to_id = {n: idx for idx, n in enumerate(joint_names)} ordered_joint_ids = [] for bj, _, _ in bvh_layout: if bj.Name == "Root": ordered_joint_ids.append(None) else: ordered_joint_ids.append(name_to_id[bj.Name]) bvh_joints = [bj for bj, _, _ in bvh_layout] for bj in bvh_joints: bj.Keyframes = [None] * T # type: ignore[list-item] identity_quat = glm.quat(1.0, 0.0, 0.0, 0.0) zero_vec = glm.vec3(0.0, 0.0, 0.0) for t in range(T): for bj, jid in zip(bvh_joints, ordered_joint_ids): if jid is None: position = zero_vec rotation = identity_quat elif jid == root_idx: pos = root_xyz[t] position = glm.vec3(float(pos[0]), float(pos[1]), float(pos[2])) qw, qx, qy, qz = q_wxyz[t, jid] rotation = glm.quat(float(qw), float(qx), float(qy), float(qz)) else: position = zero_vec qw, qx, qy, qz = q_wxyz[t, jid] rotation = glm.quat(float(qw), float(qx), float(qy), float(qz)) bj.Keyframes[t] = Pose(position, rotation) # type: ignore[index] container = bvhio.BvhContainer(root_joint, frameCount=T, frameTime=1.0 / float(fps)) with tempfile.NamedTemporaryFile(mode="w", suffix=".bvh", delete=False, encoding="utf-8") as f: tmp_path = f.name try: bvhio.writeBvh(tmp_path, container, percision=6) bvh_text = Path(tmp_path).read_text(encoding="utf-8") return _strip_end_site_blocks(bvh_text) finally: try: os.remove(tmp_path) except Exception: pass def motion_to_bvh_bytes( local_rot_mats: torch.Tensor, root_positions: torch.Tensor, *, skeleton, fps: float, standard_tpose: bool = False, ) -> bytes: """Convert local rotations and root positions to BVH bytes (UTF-8). Convenience wrapper around :func:`motion_to_bvh`. """ return motion_to_bvh( local_rot_mats, root_positions, skeleton=skeleton, fps=fps, standard_tpose=standard_tpose, ).encode("utf-8") def save_motion_bvh( path: Union[str, Path], local_rot_mats: torch.Tensor, root_positions: torch.Tensor, *, skeleton, fps: float, standard_tpose: bool = False, ) -> None: """Write local rotations and root positions to a BVH file at the given path.""" Path(path).write_text( motion_to_bvh(local_rot_mats, root_positions, skeleton=skeleton, fps=fps, standard_tpose=standard_tpose), encoding="utf-8", ) def read_bvh_frame_time_seconds(path: Union[str, Path]) -> float: """Read ``Frame Time`` from a BVH file (seconds per frame).""" with open(path, encoding="utf-8") as f: for line in f: if "Frame Time:" in line: parts = line.split() return float(parts[-1]) raise ValueError(f"Could not find 'Frame Time:' in {path}") def bvh_to_kimodo_motion( path: Union[str, Path], skeleton=None, *, standard_tpose: bool = False, ) -> Tuple: """Load a Kimodo-style SOMA BVH into a Kimodo motion dict. Expects the same hierarchy as :func:`save_motion_bvh` (``Root`` wrapper + SOMA77 joints). The frame rate is always read from the BVH ``Frame Time`` header. Callers that need a different playback rate should resample the returned motion dict (see :func:`~kimodo.exports.motion_io.resample_motion_dict_to_kimodo_fps`). Returns: ``(motion_dict, source_fps)`` where ``source_fps`` is the native BVH frame rate read from the file header. """ from kimodo.exports.motion_io import complete_motion_dict from kimodo.skeleton.bvh import parse_bvh_motion from kimodo.skeleton.registry import build_skeleton if skeleton is None: skeleton = build_skeleton(77) device = skeleton.neutral_joints.device local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(str(path)) local_rot_mats = local_rot_mats.to(device=device) root_trans = root_trans.to(device=device) if int(local_rot_mats.shape[1]) != int(skeleton.nbjoints): raise ValueError( f"BVH has {local_rot_mats.shape[1]} joints but skeleton has {skeleton.nbjoints}; " "use a Kimodo-exported SOMA BVH or matching skeleton." ) if not standard_tpose: local_rot_mats, _ = skeleton.to_standard_tpose(local_rot_mats) return complete_motion_dict(local_rot_mats, root_trans, skeleton, float(bvh_fps)), bvh_fps ================================================ FILE: kimodo/exports/motion_convert_lib.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Library API for converting between Kimodo NPZ, AMASS NPZ, SOMA BVH, and G1 MuJoCo CSV.""" from __future__ import annotations import warnings import numpy as np from kimodo.exports.bvh import bvh_to_kimodo_motion, save_motion_bvh from kimodo.exports.motion_formats import ( infer_source_format_from_path, infer_target_format_from_path, resolve_source_fps, ) from kimodo.exports.motion_io import ( load_amass_npz, load_g1_csv, load_kimodo_npz_as_torch, save_kimodo_npz_at_target_fps, ) from kimodo.exports.mujoco import MujocoQposConverter from kimodo.exports.smplx import AMASSConverter from kimodo.skeleton.registry import build_skeleton def convert_motion_files( input_path: str, output_path: str, *, from_fmt: str | None = None, to_fmt: str | None = None, source_fps: float | None = None, z_up: bool = True, mujoco_rest_zero: bool = False, bvh_standard_tpose: bool = False, ) -> None: """Convert a motion file between Kimodo-supported formats. Supported pairs (hub-and-spoke through Kimodo NPZ): - amass <-> kimodo - soma-bvh <-> kimodo - g1-csv <-> kimodo Args: input_path: Source file (``.npz``, ``.bvh``, or ``.csv``). output_path: Destination file. from_fmt: Source format; inferred from extension/contents when ``None``. to_fmt: Target format; inferred from extension when ``None``. source_fps: Source motion frame rate (Hz). If provided, trusted as-is. If ``None``, auto-detected from BVH ``Frame Time``, AMASS ``mocap_frame_rate``, or default 30. z_up: For AMASS conversions, apply the Z-up <-> Kimodo Y-up transform. mujoco_rest_zero: For G1 CSV, joint angles relative to MuJoCo rest pose. bvh_standard_tpose: If input or output is BVH: the BVH file uses the standard T-pose as its rest pose instead of the BONES-SEED rest pose. """ from_fmt = from_fmt or infer_source_format_from_path(input_path) to_fmt = to_fmt or infer_target_format_from_path(output_path, from_fmt) _validate_output_extension(to_fmt, output_path) pair = (from_fmt, to_fmt) if pair == ("amass", "kimodo"): sk = build_skeleton(22) effective_source = source_fps if effective_source is None: with np.load(input_path, allow_pickle=True) as z: effective_source = float(z["mocap_frame_rate"]) if "mocap_frame_rate" in z.files else 30.0 motion = load_amass_npz(input_path, source_fps=effective_source, z_up=z_up) save_kimodo_npz_at_target_fps(motion, sk, effective_source, output_path) return if pair == ("kimodo", "amass"): data, J = load_kimodo_npz_as_torch(input_path, ensure_complete=False) if J != 22: raise ValueError(f"Kimodo→AMASS requires 22 joints (SMPL-X); this file has J={J}.") sk = build_skeleton(22) effective_source = resolve_source_fps(source_fps, "kimodo", input_path, None) converter = AMASSConverter(fps=effective_source, skeleton=sk) converter.convert_save_npz(data, output_path, z_up=z_up) return if pair == ("soma-bvh", "kimodo"): sk = build_skeleton(77) motion, bvh_fps = bvh_to_kimodo_motion(input_path, skeleton=sk, standard_tpose=bvh_standard_tpose) effective_source = source_fps if source_fps is not None else bvh_fps save_kimodo_npz_at_target_fps(motion, sk, effective_source, output_path) return if pair == ("kimodo", "soma-bvh"): data, J = load_kimodo_npz_as_torch(input_path, ensure_complete=False) if J == 30: warnings.warn( f"Input has 30 joints (somaskel30); expanding to somaskel77 for BVH export.", UserWarning, stacklevel=2, ) sk = build_skeleton(30) elif J == 77: sk = build_skeleton(77) else: raise ValueError(f"Kimodo→BVH requires a SOMA skeleton (30 or 77 joints); this file has J={J}.") effective_source = resolve_source_fps(source_fps, "kimodo", input_path, None) save_motion_bvh( output_path, data["local_rot_mats"], data["root_positions"], skeleton=sk, fps=effective_source, standard_tpose=bvh_standard_tpose, ) return if pair == ("g1-csv", "kimodo"): sk = build_skeleton(34) effective_source = resolve_source_fps(source_fps, "g1-csv", input_path, None) motion = load_g1_csv(input_path, source_fps=effective_source, mujoco_rest_zero=mujoco_rest_zero) save_kimodo_npz_at_target_fps(motion, sk, effective_source, output_path) return if pair == ("kimodo", "g1-csv"): data, J = load_kimodo_npz_as_torch(input_path, ensure_complete=False) if J != 34: raise ValueError(f"Kimodo→CSV requires G1 with 34 joints; this file has J={J}.") sk = build_skeleton(34) effective_source = resolve_source_fps(source_fps, "kimodo", input_path, None) converter = MujocoQposConverter(sk) qpos = converter.dict_to_qpos( {k: v for k, v in data.items() if k in ("local_rot_mats", "root_positions")}, device=str(sk.neutral_joints.device), numpy=True, mujoco_rest_zero=mujoco_rest_zero, ) converter.save_csv(qpos, output_path) return raise ValueError( f"Unsupported conversion {from_fmt!r} → {to_fmt!r}. " "Supported: amass↔kimodo (SMPL-X NPZ), soma-bvh↔kimodo, g1-csv↔kimodo." ) def _validate_output_extension(to_fmt: str, output_path: str) -> None: lower = output_path.lower() if to_fmt == "kimodo" and lower.endswith(".npz"): return if to_fmt == "amass": if not lower.endswith(".npz"): raise ValueError("AMASS output must use a .npz path.") elif to_fmt == "soma-bvh": if not lower.endswith(".bvh"): raise ValueError("SOMA BVH output must use a .bvh path.") elif to_fmt == "g1-csv": if not lower.endswith(".csv"): raise ValueError("G1 CSV output must use a .csv path.") ================================================ FILE: kimodo/exports/motion_formats.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Infer motion file formats from paths and NPZ contents.""" from __future__ import annotations import os from typing import Literal import numpy as np MotionSourceFormat = Literal["amass", "kimodo", "soma-bvh", "g1-csv"] MotionTargetFormat = Literal["amass", "kimodo", "soma-bvh", "g1-csv"] NpzMotionKind = Literal["amass", "kimodo"] def infer_npz_kind(path: str) -> NpzMotionKind: """Classify a ``.npz`` as AMASS SMPL-X or Kimodo from required array keys.""" with np.load(path, allow_pickle=False) as z: keys = set(z.files) if "trans" in keys and "pose_body" in keys and "root_orient" in keys: return "amass" if "local_rot_mats" in keys or "posed_joints" in keys: return "kimodo" raise ValueError( f"Unrecognized NPZ {path!r}: expected AMASS keys (trans, pose_body, ...) " "or Kimodo keys (local_rot_mats, posed_joints, ...)." ) def infer_source_format_from_path(path: str) -> MotionSourceFormat: """Infer converter input format from file extension and NPZ contents when needed.""" ext = os.path.splitext(path)[1].lower() if ext == ".bvh": return "soma-bvh" if ext == ".csv": return "g1-csv" if ext == ".npz": return infer_npz_kind(path) # type: ignore[return-value] raise ValueError(f"Cannot infer format from extension of {path!r}") def infer_target_format_from_path(path: str, from_fmt: MotionSourceFormat) -> MotionTargetFormat: """Infer converter output format from destination path and source format.""" ext = os.path.splitext(path)[1].lower() if ext == ".bvh": return "soma-bvh" if ext == ".csv": return "g1-csv" if ext == ".npz": if from_fmt == "amass": return "kimodo" if from_fmt == "kimodo": return "amass" if from_fmt in ("g1-csv", "soma-bvh"): return "kimodo" raise ValueError( "Ambiguous .npz output: set --to to 'kimodo' or 'amass' when the input format is not amass/kimodo." ) raise ValueError(f"Cannot infer output format from extension of {path!r}") def resolve_source_fps( fps: float | None, from_kind: str, input_path: str, data: dict | None, ) -> float: """Resolve source frame rate (Hz) for conversion when ``fps`` is not overridden.""" if fps is not None: return float(fps) if data is not None and "mocap_frame_rate" in data: return float(np.asarray(data["mocap_frame_rate"]).item()) if from_kind == "soma-bvh": from kimodo.exports.bvh import read_bvh_frame_time_seconds return 1.0 / read_bvh_frame_time_seconds(input_path) return 30.0 ================================================ FILE: kimodo/exports/motion_io.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Assemble Kimodo NPZ-compatible motion dicts from local rotations + root trajectory.""" from __future__ import annotations import os import warnings from typing import Any, Dict, Tuple import numpy as np import torch from kimodo.geometry import matrix_to_quaternion, quaternion_to_matrix from kimodo.motion_rep.feature_utils import compute_heading_angle, compute_vel_xyz from kimodo.motion_rep.feet import foot_detect_from_pos_and_vel from kimodo.motion_rep.smooth_root import get_smooth_root_pos from kimodo.skeleton import SkeletonBase from kimodo.skeleton.registry import build_skeleton from kimodo.tools import to_numpy # Default motion rate for Kimodo NPZ produced by format conversion (matches common model FPS). KIMODO_CONVERT_TARGET_FPS = 30.0 def _quaternion_slerp(q0: torch.Tensor, q1: torch.Tensor, t: torch.Tensor) -> torch.Tensor: """Spherical linear interpolation; ``q0``, ``q1`` (..., 4) wxyz; ``t`` broadcastable to (..., 1).""" if t.dim() < q0.dim(): t = t.unsqueeze(-1) dot = (q0 * q1).sum(dim=-1, keepdim=True) q1 = torch.where(dot < 0, -q1, q1) dot = torch.abs(dot).clamp(-1.0, 1.0) theta_0 = torch.acos(dot) sin_theta = torch.sin(theta_0) s0 = torch.sin((1.0 - t) * theta_0) / sin_theta.clamp(min=1e-8) s1 = torch.sin(t * theta_0) / sin_theta.clamp(min=1e-8) q = s0 * q0 + s1 * q1 return q / torch.linalg.norm(q, dim=-1, keepdim=True).clamp(min=1e-8) def resample_motion_dict_to_kimodo_fps( motion_dict: Dict[str, torch.Tensor], skeleton: SkeletonBase, source_fps: float, target_fps: float = KIMODO_CONVERT_TARGET_FPS, ) -> Tuple[Dict[str, torch.Tensor], bool]: """Resample a Kimodo motion dict to ``target_fps``. When the fps ratio is close to an integer (e.g. 120 / 30 = 4), the faster stepping method is used (take every *step*-th frame). Otherwise falls back to linear interp (root) + quaternion slerp (joints). Re-runs :func:`complete_motion_dict` at the target rate so derived channels stay consistent. Returns: The motion dict and ``True`` if time resampling was applied, else ``False`` (already at ``target_fps`` with matching frame count; only re-derived via FK). """ local_rot_mats = motion_dict["local_rot_mats"] root_positions = motion_dict["root_positions"] local_rot_mats, root_positions = _coerce_time_local_root(local_rot_mats, root_positions) t_in = int(local_rot_mats.shape[0]) if t_in < 1: raise ValueError("Motion must have at least one frame.") if source_fps <= 0: raise ValueError(f"source_fps must be positive; got {source_fps}") t_out = max(1, int(round(t_in * target_fps / source_fps))) if t_out == t_in and abs(float(source_fps) - float(target_fps)) < 1e-3: return complete_motion_dict(local_rot_mats, root_positions, skeleton, float(target_fps)), False ratio = source_fps / target_fps step = round(ratio) if step >= 2 and abs(ratio - step) < 0.05: local_out = local_rot_mats[::step] root_out = root_positions[::step] else: device = local_rot_mats.device dtype = local_rot_mats.dtype u = torch.linspace(0, t_in - 1, t_out, device=device, dtype=dtype) i0 = u.floor().long().clamp(0, t_in - 1) i1 = torch.minimum(i0 + 1, torch.tensor(t_in - 1, device=device)) tau_1d = (u - i0.float()).unsqueeze(-1) rp0 = root_positions[i0] rp1 = root_positions[i1] root_out = (1.0 - tau_1d) * rp0 + tau_1d * rp1 quats = matrix_to_quaternion(local_rot_mats) q0 = quats[i0] q1 = quats[i1] tau_q = (u - i0.float()).view(t_out, 1, 1) quat_out = _quaternion_slerp(q0, q1, tau_q) local_out = quaternion_to_matrix(quat_out) return complete_motion_dict(local_out, root_out, skeleton, float(target_fps)), True def warn_kimodo_npz_framerate(source_fps: float, t_before: int, t_after: int) -> None: """Emit a warning after time resampling for Kimodo NPZ (linear root, quaternion slerp per joint).""" warnings.warn( f"Resampled motion to {KIMODO_CONVERT_TARGET_FPS:.0f} Hz for Kimodo NPZ " f"(source ~{source_fps:.4g} Hz, {t_before} input frames → {t_after} output frames). " "Pass --source-fps if the detected source rate is wrong.", UserWarning, stacklevel=3, ) def _coerce_time_local_root( local_rot_mats: torch.Tensor, root_positions: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: """Normalize to shapes (T, J, 3, 3) and (T, 3).""" if local_rot_mats.dim() == 5: if int(local_rot_mats.shape[0]) != 1: raise ValueError(f"local_rot_mats batch size must be 1 for single clip; got {local_rot_mats.shape[0]}") local_rot_mats = local_rot_mats[0] if root_positions.dim() == 3: if int(root_positions.shape[0]) != 1: raise ValueError(f"root_positions batch size must be 1; got {root_positions.shape[0]}") root_positions = root_positions[0] if local_rot_mats.dim() != 4: raise ValueError(f"local_rot_mats must be (T,J,3,3); got {tuple(local_rot_mats.shape)}") if root_positions.dim() != 2 or int(root_positions.shape[-1]) != 3: raise ValueError(f"root_positions must be (T,3); got {tuple(root_positions.shape)}") if int(local_rot_mats.shape[0]) != int(root_positions.shape[0]): raise ValueError("local_rot_mats and root_positions must have the same number of frames") return local_rot_mats, root_positions def complete_motion_dict( local_rot_mats: torch.Tensor, root_positions: torch.Tensor, skeleton: SkeletonBase, fps: float, ) -> Dict[str, torch.Tensor]: """Build the Kimodo motion output dict from local rotations and root positions. Matches keys written by CLI generation (see docs/source/user_guide/output_formats.md). Args: local_rot_mats: (T, J, 3, 3) or (1, T, J, 3, 3) local rotation matrices. root_positions: (T, 3) or (1, T, 3) root / pelvis world positions (meters). skeleton: Skeleton instance (SOMA77, G1, SMPL-X, etc.). fps: Sampling rate (Hz). Returns: Dict with tensors ``posed_joints``, ``global_rot_mats``, ``local_rot_mats``, ``foot_contacts``, ``smooth_root_pos``, ``root_positions``, ``global_root_heading``. """ device = local_rot_mats.device dtype = local_rot_mats.dtype local_rot_mats, root_positions = _coerce_time_local_root( local_rot_mats.to(device=device, dtype=dtype), root_positions.to(device=device, dtype=dtype), ) global_rot_mats, posed_joints, _ = skeleton.fk(local_rot_mats, root_positions) smooth_root_pos = get_smooth_root_pos(root_positions.unsqueeze(0)).squeeze(0) lengths = torch.tensor([posed_joints.shape[0]], device=device) velocities = compute_vel_xyz(posed_joints.unsqueeze(0), fps, lengths=lengths).squeeze(0) heading_angle = compute_heading_angle(posed_joints.unsqueeze(0), skeleton).squeeze(0) global_root_heading = torch.stack([torch.cos(heading_angle), torch.sin(heading_angle)], dim=-1) foot_contacts = foot_detect_from_pos_and_vel( posed_joints.unsqueeze(0), velocities.unsqueeze(0), skeleton, 0.15, 0.10, ).squeeze(0) return { "posed_joints": posed_joints, "global_rot_mats": global_rot_mats, "local_rot_mats": local_rot_mats, "foot_contacts": foot_contacts, "smooth_root_pos": smooth_root_pos, "root_positions": root_positions, "global_root_heading": global_root_heading, } def motion_dict_to_numpy(d: Dict[str, Any]) -> Dict[str, np.ndarray]: """Convert motion dict values to numpy arrays for ``np.savez``.""" out: Dict[str, np.ndarray] = {} for k, v in d.items(): if hasattr(v, "detach"): out[k] = to_numpy(v) elif isinstance(v, np.ndarray): out[k] = v else: out[k] = np.asarray(v) return out def save_kimodo_npz(path: str, motion_dict: Dict[str, Any]) -> None: """Save a Kimodo-compatible motion dict to ``.npz`` (numpy arrays).""" np.savez(path, **motion_dict_to_numpy(motion_dict)) def load_kimodo_npz(path: str) -> Dict[str, np.ndarray]: """Load arrays from a Kimodo ``.npz`` file.""" with np.load(path, allow_pickle=False) as data: return {k: np.asarray(data[k]) for k in data.files} def load_g1_csv( path: str, source_fps: float = KIMODO_CONVERT_TARGET_FPS, *, mujoco_rest_zero: bool = False, ) -> Dict[str, torch.Tensor]: """Load a G1 MuJoCo ``qpos`` CSV (``(T, 36)``) into a Kimodo motion dict. Args: path: CSV path (comma-separated, no header). source_fps: Source frame rate (Hz) of the CSV data. mujoco_rest_zero: Must match how the CSV was written (see :class:`MujocoQposConverter`). """ from kimodo.exports.mujoco import MujocoQposConverter qpos = np.loadtxt(path, delimiter=",") if qpos.ndim != 2 or qpos.shape[-1] != 36: raise ValueError(f"Expected G1 CSV with shape (T, 36); got {qpos.shape}") sk = build_skeleton(34) converter = MujocoQposConverter(sk) return converter.qpos_to_motion_dict(qpos, float(source_fps), mujoco_rest_zero=mujoco_rest_zero) def load_amass_npz( path: str, source_fps: float | None = None, *, z_up: bool = True, ) -> Dict[str, torch.Tensor]: """Load an AMASS-style SMPL-X ``.npz`` into a Kimodo motion dict (22 joints). Args: path: NPZ with ``trans``, ``root_orient``, ``pose_body``, etc. source_fps: Source frame rate (Hz); if ``None``, uses ``mocap_frame_rate`` from the file when present, else 30 Hz. z_up: If ``True``, apply AMASS Z-up to Kimodo Y-up transform (same as CLI). """ from kimodo.exports.smplx import amass_npz_to_kimodo_motion sk = build_skeleton(22) return amass_npz_to_kimodo_motion(path, sk, source_fps=source_fps, z_up=z_up) def load_kimodo_npz_as_torch( path: str, source_fps: float = KIMODO_CONVERT_TARGET_FPS, *, ensure_complete: bool = True, ) -> tuple[Dict[str, torch.Tensor], int]: """Load a Kimodo NPZ and return all arrays as torch tensors on the skeleton device. Args: path: Kimodo NPZ file path. source_fps: Source frame rate (Hz) used for derived channels when ``ensure_complete=True``. ensure_complete: If ``True`` and the NPZ lacks derived channels (``posed_joints``, ``global_rot_mats``, …), run :func:`complete_motion_dict` to fill them from ``local_rot_mats`` + ``root_positions``. If ``False``, load all arrays verbatim (requires ``local_rot_mats``). Returns: ``(tensor_dict, num_joints)`` """ raw = load_kimodo_npz(path) if "local_rot_mats" in raw: j = int(raw["local_rot_mats"].shape[1]) elif "posed_joints" in raw: j = int(raw["posed_joints"].shape[1]) else: raise ValueError("Kimodo NPZ must contain 'local_rot_mats' or 'posed_joints'.") sk = build_skeleton(j) device = sk.neutral_joints.device dtype = torch.float32 if not ensure_complete: if "local_rot_mats" not in raw: raise ValueError("Kimodo NPZ must contain 'local_rot_mats' (and typically 'root_positions').") out: Dict[str, torch.Tensor] = {} for k, v in raw.items(): out[k] = torch.from_numpy(np.asarray(v)).to(device=device, dtype=dtype) return out, j if "posed_joints" in raw and "global_rot_mats" in raw: out = {} for k, v in raw.items(): out[k] = torch.from_numpy(np.asarray(v)).to(device=device, dtype=dtype) return out, j if "local_rot_mats" not in raw or "root_positions" not in raw: raise ValueError("Kimodo NPZ must contain posed_joints+global_rot_mats, or local_rot_mats+root_positions.") local = torch.from_numpy(np.asarray(raw["local_rot_mats"])).to(device=device, dtype=dtype) root = torch.from_numpy(np.asarray(raw["root_positions"])).to(device=device, dtype=dtype) return complete_motion_dict(local, root, sk, float(source_fps)), j def save_kimodo_npz_at_target_fps( motion: Dict[str, torch.Tensor], skeleton: SkeletonBase, source_fps: float, output_path: str, target_fps: float = KIMODO_CONVERT_TARGET_FPS, ) -> None: """Resample a motion dict to ``target_fps`` when needed, then save Kimodo NPZ.""" t_before = int(motion["local_rot_mats"].shape[0]) motion, did_resample = resample_motion_dict_to_kimodo_fps(motion, skeleton, source_fps, target_fps) t_after = int(motion["local_rot_mats"].shape[0]) if did_resample: warn_kimodo_npz_framerate(source_fps, t_before, t_after) save_kimodo_npz(output_path, motion) def kimodo_npz_to_bytes(motion_dict: Dict[str, Any]) -> bytes: """Serialize a Kimodo motion dict to in-memory NPZ bytes.""" import io buf = io.BytesIO() np.savez(buf, **motion_dict_to_numpy(motion_dict)) return buf.getvalue() def g1_csv_to_bytes(motion_dict: Dict[str, Any], skeleton: SkeletonBase, device: Any) -> bytes: """Convert a motion dict to G1 MuJoCo CSV bytes via :class:`MujocoQposConverter`.""" import io from kimodo.exports.mujoco import MujocoQposConverter converter = MujocoQposConverter(skeleton) qpos = converter.dict_to_qpos( {k: v for k, v in motion_dict.items() if k in ("local_rot_mats", "root_positions")}, device, numpy=True, ) buf = io.StringIO() np.savetxt(buf, qpos, delimiter=",") return buf.getvalue().encode("utf-8") def amass_npz_to_bytes(motion_dict: Dict[str, Any], skeleton: SkeletonBase, fps: float) -> bytes: """Convert a motion dict to AMASS NPZ bytes via :class:`AMASSConverter`.""" import io from kimodo.exports.smplx import AMASSConverter converter = AMASSConverter(skeleton=skeleton, fps=fps) buf = io.BytesIO() converter.convert_save_npz( {k: v for k, v in motion_dict.items() if k in ("local_rot_mats", "root_positions")}, buf, ) return buf.getvalue() def _read_amass_source_fps(path: str) -> float: """Read the source frame rate from an AMASS NPZ, defaulting to 30 Hz.""" with np.load(path, allow_pickle=True) as z: if "mocap_frame_rate" in z.files: return float(z["mocap_frame_rate"]) return 30.0 def load_motion_file( path: str, source_fps: float | None = None, target_fps: float | None = None, *, z_up: bool = True, mujoco_rest_zero: bool = False, ) -> tuple[Dict[str, torch.Tensor], int]: """Load a motion file and return a Kimodo motion dict plus joint count. Supports SOMA BVH (``.bvh``), G1 MuJoCo CSV (``.csv``), Kimodo NPZ, and AMASS SMPL-X NPZ (``.npz``). The motion is loaded at its native (or overridden) source rate, then resampled to ``target_fps`` when they differ. Args: path: Path to ``.bvh``, ``.csv``, or ``.npz``. source_fps: Source frame rate (Hz). If provided, trusted as-is. If ``None``, auto-detected per format: BVH ``Frame Time`` header, AMASS ``mocap_frame_rate``, or :data:`KIMODO_CONVERT_TARGET_FPS` (30 Hz) for CSV / Kimodo NPZ. target_fps: Desired output frame rate (Hz). Defaults to :data:`KIMODO_CONVERT_TARGET_FPS` (30 Hz). The motion is resampled when ``source_fps`` and ``target_fps`` differ. z_up: AMASS NPZ only; passed to :func:`load_amass_npz`. mujoco_rest_zero: G1 CSV only; passed to :func:`load_g1_csv`. Returns: ``(motion_dict, num_joints)`` with the same keys as :func:`complete_motion_dict`. """ from kimodo.exports.motion_formats import infer_npz_kind if target_fps is None: target_fps = KIMODO_CONVERT_TARGET_FPS ext = os.path.splitext(path)[1].lower() if ext == ".bvh": from kimodo.exports.bvh import bvh_to_kimodo_motion motion_dict, bvh_fps = bvh_to_kimodo_motion(path) effective_source = source_fps if source_fps is not None else bvh_fps num_joints = int(motion_dict["local_rot_mats"].shape[1]) elif ext == ".csv": effective_source = source_fps if source_fps is not None else KIMODO_CONVERT_TARGET_FPS motion_dict = load_g1_csv(path, source_fps=effective_source, mujoco_rest_zero=mujoco_rest_zero) num_joints = 34 elif ext == ".npz": kind = infer_npz_kind(path) if kind == "amass": effective_source = source_fps if source_fps is not None else _read_amass_source_fps(path) motion_dict = load_amass_npz(path, source_fps=effective_source, z_up=z_up) num_joints = 22 else: effective_source = source_fps if source_fps is not None else KIMODO_CONVERT_TARGET_FPS motion_dict, num_joints = load_kimodo_npz_as_torch(path, source_fps=effective_source) else: raise ValueError(f"Unsupported motion file {path!r}; expected .bvh, .csv, or .npz") if abs(effective_source - target_fps) > 0.5: sk = build_skeleton(num_joints) motion_dict, did_resample = resample_motion_dict_to_kimodo_fps(motion_dict, sk, effective_source, target_fps) if did_resample: t_out = int(motion_dict["local_rot_mats"].shape[0]) warnings.warn( f"Resampled motion from {effective_source:.4g} Hz to " f"{target_fps:.0f} Hz ({t_out} frames).", UserWarning, stacklevel=2, ) return motion_dict, num_joints ================================================ FILE: kimodo/exports/mujoco.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Convert kimodo motion (y-up, z-forward) to MuJoCo qpos (z-up, x-forward) for G1 skeleton.""" import os import xml.etree.ElementTree as ET from typing import Optional import numpy as np import torch from scipy.spatial.transform import Rotation from kimodo.assets import skeleton_asset_path from kimodo.geometry import ( axis_angle_to_matrix, matrix_to_axis_angle, matrix_to_quaternion, quaternion_to_matrix, ) from kimodo.skeleton import G1Skeleton34, SkeletonBase, global_rots_to_local_rots from kimodo.tools import ensure_batched, to_numpy, to_torch # Cache so that the same (skeleton, xml_path) returns the same converter instance. _converter_cache: dict[tuple[int, str], "MujocoQposConverter"] = {} class MujocoQposConverter: """Fast batch converter from our dictionary format to mujoco qpos with precomputed transforms. In mujoco, the coordination is z up and x forward, right handed. Features (30 joints): - root (pelvis, 7 = translation + rotation) + 29 dof joints (29) In kimodo, the coordinate system is y up and z forward, right handed. Features (34 joints): - root (pelvis) + (34 - 1) joints; among these joints, 4 are end-effector joints added by kimodo. Cached by (input_skeleton id, xml_path); repeated calls with the same args return the same instance. """ def __new__( cls, input_skeleton: SkeletonBase, xml_path: str = str(skeleton_asset_path("g1skel34", "xml", "g1.xml")), ): key = (id(input_skeleton), xml_path) if key not in _converter_cache: inst = object.__new__(cls) _converter_cache[key] = inst return _converter_cache[key] def __init__( self, input_skeleton: SkeletonBase, xml_path: str = str(skeleton_asset_path("g1skel34", "xml", "g1.xml")), ): """Initialize converter with precomputed transforms. Args: xml_path: Path to the mujoco XML file containing joint definitions """ if getattr(self, "_initialized", False): return self.xml_path = xml_path self.skeleton = input_skeleton self._prepare_transforms() self._subtree_joints = {} self._initialized = True def _prepare_transforms(self): """Precompute all necessary transforms for efficient batch processing.""" # Define coordinate transformations between mujoco and kimodo space # 1) R_zup_to_yup: rotation around x-axis by -90 degrees # 2) x_forward_to_y_forward: rotation around z-axis by -90 degrees # Combined transformation matrix: mujoco_to_kimodo = R_zup_to_yup * x_forward_to_y_forward self.mujoco_to_kimodo_matrix = torch.tensor( [[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]], dtype=torch.float32 ) self.kimodo_to_mujoco_matrix = self.mujoco_to_kimodo_matrix.T # Inverse transformation: kimodo_to_mujoco # Parse XML once and extract joint information tree = ET.parse(self.xml_path) root = tree.getroot() xml_classes = [x for x in tree.findall(".//default") if "class" in x.attrib] joint_axes = dict() class_ranges: dict[str, tuple[float, float]] = {} for xml_class in xml_classes: j = xml_class.findall("joint") if j: joint_axes[xml_class.get("class")] = j[0].get("axis") range_str = j[0].get("range") if range_str: range_vals = [float(x) for x in range_str.split()] if len(range_vals) == 2: class_ranges[xml_class.get("class")] = ( range_vals[0], range_vals[1], ) mujoco_hinge_joints = root.find("worldbody").findall(".//joint") # skip the base joint self._mujoco_joint_axis_values_kimodo_space = torch.zeros( (len(mujoco_hinge_joints), 3), dtype=torch.float32 ) # mujoco order but kimodo space self._mujoco_joint_axis_values_mujoco_space = torch.zeros( (len(mujoco_hinge_joints), 3), dtype=torch.float32 ) # mujoco order but mujoco space # for the below indices, mujoco_indices_to_kimodo_indices does not include mujoco root (30 - 1 = 29 elements), # while kimodo_indices_to_mujoco_indices inclues the kimodo root (32 elements). self._mujoco_indices_to_kimodo_indices = torch.zeros((len(mujoco_hinge_joints),), dtype=torch.int32) self._kimodo_indices_to_mujoco_indices = ( torch.ones((self.skeleton.nbjoints,), dtype=torch.int32) * -1 ) # -1 means not in the csv skeleton self._nb_joints_mujoco = len(mujoco_hinge_joints) + 1 self._nb_joints_kimodo = self.skeleton.nbjoints self._mujoco_joint_including_root_parent_list = torch.full( (len(mujoco_hinge_joints) + 1,), -1, dtype=torch.int32 ) self._mujoco_joint_including_root_list = ["pelvis_skel"] for joint_id_in_csv, joint in enumerate(mujoco_hinge_joints): joint_name_in_skeleton = joint.get("name").replace("_joint", "_skel") joint_parent_name_in_skeleton = self.skeleton.bone_parents[joint_name_in_skeleton] self._mujoco_joint_including_root_list.append(joint_name_in_skeleton) self._mujoco_joint_including_root_parent_list[joint_id_in_csv + 1] = ( self._mujoco_joint_including_root_list.index(joint_parent_name_in_skeleton) ) joint_idx_in_kimodo_skeleton = self.skeleton.bone_order_names.index(joint_name_in_skeleton) axis_values = [float(x) for x in (joint.get("axis") or joint_axes[joint.get("class")]).split(" ")] # the mapped axis in kimodo skeleton space is calculated as bones_axis = mujoco_to_kimodo.apply(axis_values) # [1, 0, 0] -> [0, 0, 1]; [0, 1, 0] -> [1, 0, 0]; [0, 0, 1] -> [0, 1, 0] mujoco_joint_axis_mapping_kimodo_space = [ torch.tensor([0, 0, 1]), torch.tensor([1, 0, 0]), torch.tensor([0, 1, 0]), ][np.argmax(axis_values)] self._mujoco_joint_axis_values_kimodo_space[joint_id_in_csv] = mujoco_joint_axis_mapping_kimodo_space self._mujoco_joint_axis_values_mujoco_space[joint_id_in_csv] = torch.tensor(axis_values) self._mujoco_indices_to_kimodo_indices[joint_id_in_csv] = joint_idx_in_kimodo_skeleton self._kimodo_indices_to_mujoco_indices[joint_idx_in_kimodo_skeleton] = ( joint_id_in_csv + 1 ) # +1 for the root self._kimodo_indices_to_mujoco_indices[0] = 0 # the root joint mapping # Joint limits (min, max) in radians for each mujoco hinge, for clamping self._joint_limits_min = torch.full((len(mujoco_hinge_joints),), float("-inf"), dtype=torch.float32) self._joint_limits_max = torch.full((len(mujoco_hinge_joints),), float("inf"), dtype=torch.float32) for joint_id_in_csv, joint in enumerate(mujoco_hinge_joints): range_vals = None if joint.get("range"): range_vals = [float(x) for x in joint.get("range").split()] elif joint.get("class") and joint.get("class") in class_ranges: lo, hi = class_ranges[joint.get("class")] range_vals = [lo, hi] if range_vals is not None and len(range_vals) == 2: self._joint_limits_min[joint_id_in_csv] = range_vals[0] self._joint_limits_max[joint_id_in_csv] = range_vals[1] # load the offset matrices from the xml R_zup_to_yup = Rotation.from_euler("x", -90, degrees=True) x_forward_to_y_forward = Rotation.from_euler("z", -90, degrees=True) mujoco_to_kimodo = R_zup_to_yup * x_forward_to_y_forward self._rot_offsets_q2t = torch.zeros(len(self._kimodo_indices_to_mujoco_indices), 3, 3, dtype=torch.float32) self._rot_offsets_q2t[...] = torch.eye(3)[None] self._rot_offsets_f2q = torch.zeros(len(self._kimodo_indices_to_mujoco_indices), 3, 3, dtype=torch.float32) self._rot_offsets_f2q[...] = torch.eye(3)[None] parent_map = {child: parent for parent in root.iter() for child in parent} for i, joint in enumerate(mujoco_hinge_joints): body = parent_map[joint] if "quat" in body.attrib: rot = Rotation.from_quat( [float(x) for x in body.get("quat").strip().split(" ")], scalar_first=True, ) idx = self._mujoco_indices_to_kimodo_indices[i] self._rot_offsets_q2t[idx] = torch.from_numpy(rot.as_matrix()) rot = mujoco_to_kimodo * rot * mujoco_to_kimodo.inv() self._rot_offsets_f2q[idx] = torch.from_numpy(rot.as_matrix().T) # Hinge axis in f2q space so extraction uses the same frame as joint_rot_f2q. # Then extract(offset) gives the angle s.t. axis_angle(angle * axis_f2q) = offset, and # reconstruction R_local = offset.T @ axis_angle(angle * axis_f2q) = I when input is identity. axis_kimodo = self._mujoco_joint_axis_values_kimodo_space self._mujoco_joint_axis_values_f2q_space = torch.zeros_like(axis_kimodo) for i in range(len(mujoco_hinge_joints)): j = self._mujoco_indices_to_kimodo_indices[i].item() axis_f2q = torch.mv(self._rot_offsets_f2q[j], axis_kimodo[i]) n = axis_f2q.norm() if n > 1e-8: axis_f2q = axis_f2q / n self._mujoco_joint_axis_values_f2q_space[i] = axis_f2q # Rest-pose DOFs: angle we extract when R_local = I (t-pose). MuJoCo limits are # relative to joint zero (rest pose), so we must clamp in MuJoCo space: convert # joint_dofs to mujoco_angle = joint_dofs - rest_dofs, clamp, then back. rest_rot_f2q = self._rot_offsets_f2q[self._mujoco_indices_to_kimodo_indices] rest_rot_f2q = rest_rot_f2q.unsqueeze(0).unsqueeze(0) self._rest_dofs = self._local_rots_f2q_to_joint_dofs(rest_rot_f2q).squeeze(0).squeeze(0) # Axis-angle rest DOFs: angle s.t. axis_angle(angle * axis_f2q) = offset. Used in # project_to_real_robot_rotations so extract+reconstruct round-trip and t-pose is preserved. rest_rot_f2q_flat = self._rot_offsets_f2q[self._mujoco_indices_to_kimodo_indices] full_aa = matrix_to_axis_angle(rest_rot_f2q_flat) self._rest_dofs_axis_angle = (full_aa * self._mujoco_joint_axis_values_f2q_space).sum(dim=-1) def dict_to_qpos( self, output: dict, device: Optional[str] = None, root_quat_w_first: bool = True, numpy: bool = True, mujoco_rest_zero: bool = False, ): """Convert kimodo output dict to mujoco qpos format. Args: output: dict with keys "local_rot_mats" and "root_positions". device: device to use for the output. root_quat_w_first: If True, quaternion in qpos is (w,x,y,z). numpy: If True, convert the output to numpy array. mujoco_rest_zero: If True, joint angles are written so that kimodo rest (t-pose) maps to q=0 in MuJoCo. If False, write raw joint_dofs. Returns: qpos: (B, T, 7+J) mujoco qpos format. """ local_rot_mats = to_torch(output["local_rot_mats"], device) root_positions = to_torch(output["root_positions"], device) qpos = self.to_qpos( local_rot_mats, root_positions, root_quat_w_first=root_quat_w_first, mujoco_rest_zero=mujoco_rest_zero, ) if numpy: qpos = to_numpy(qpos) return qpos def qpos_to_motion_dict( self, qpos: torch.Tensor | np.ndarray, source_fps: float, *, root_quat_w_first: bool = True, mujoco_rest_zero: bool = False, ): """Inverse of :meth:`to_qpos` / :meth:`dict_to_qpos` for MuJoCo CSV ``(T, 36)`` rows. Args: qpos: Shape ``(T, 36)`` or ``(1, T, 36)`` (root xyz, root quat wxyz, 29 joint angles). source_fps: Source frame rate (Hz) of the qpos data. root_quat_w_first: Must match how the CSV was written (default ``True``). mujoco_rest_zero: Must match :meth:`dict_to_qpos` / :meth:`to_qpos`. Returns: Kimodo motion dict (see :func:`kimodo.exports.motion_io.complete_motion_dict`). """ from kimodo.exports.motion_io import complete_motion_dict qpos = to_torch(qpos, None) if qpos.dim() == 2: qpos = qpos.unsqueeze(0) device = qpos.device dtype = qpos.dtype batch_size, num_frames, ncols = qpos.shape if ncols != 36: raise ValueError(f"Expected qpos last dim 36; got {ncols}") kimodo_to_mujoco_matrix = self.kimodo_to_mujoco_matrix.to(device=device, dtype=dtype) mujoco_to_kimodo_matrix = kimodo_to_mujoco_matrix.T root_mujoco = qpos[..., :3] root_positions = torch.matmul(mujoco_to_kimodo_matrix[None, None, ...], root_mujoco[..., None]).squeeze(-1) quat = qpos[..., 3:7] if root_quat_w_first: root_rot_mujoco = quaternion_to_matrix(quat) else: quat_wxyz = quat[..., [3, 0, 1, 2]] root_rot_mujoco = quaternion_to_matrix(quat_wxyz) O0 = self._rot_offsets_f2q[0].to(device=device, dtype=dtype) # root_rot_mujoco is (..., 3, 3) after optional batch unsqueeze (e.g. (1, T, 3, 3)). # Use ``...il`` so ``k`` sums with ``kl``; ``...ik`` incorrectly keeps ``k`` in the output. R_f2q_root = torch.einsum( "ij,...jk,kl->...il", mujoco_to_kimodo_matrix, root_rot_mujoco, kimodo_to_mujoco_matrix, ) R_kimodo_root = torch.einsum("ij,...jk->...ik", O0.T, R_f2q_root) joint_dofs = qpos[..., 7:] if mujoco_rest_zero: rest_dofs = self._rest_dofs.to(device=device, dtype=dtype) angles = joint_dofs + rest_dofs[None, None, :] use_relative = True else: angles = joint_dofs use_relative = False nb_joints = self.skeleton.nbjoints template = torch.eye(3, device=device, dtype=dtype).expand(batch_size, num_frames, nb_joints, 3, 3).contiguous() template[:, :, 0] = R_kimodo_root local_rot_mats = self._joint_dofs_to_local_rot_mats( angles, template, device, dtype, use_relative=use_relative, ) if batch_size != 1: raise ValueError(f"Only a single clip is supported; got batch_size={batch_size}") return complete_motion_dict(local_rot_mats[0], root_positions[0], self.skeleton, source_fps) def save_csv(self, qpos: torch.Tensor | np.ndarray, csv_path): # comment this qpos = to_numpy(qpos) shape = qpos.shape if len(shape) == 2: # only one motion: save it np.savetxt(csv_path, qpos, delimiter=",") if len(shape) == 3: # batch of motions if shape[0] == 1: # if only one motion, just save it np.savetxt(csv_path, qpos[0], delimiter=",") else: csv_path_base, ext = os.path.splitext(csv_path) for i in range(shape[0]): self.save_csv(qpos[i], csv_path_base + "_" + str(i).zfill(2) + ext) def _local_rots_to_joint_dofs( self, local_rot_mats: torch.Tensor, axis_vals: torch.Tensor, ) -> torch.Tensor: """Extract per-joint single-DoF angles (radians) via Euler projection (for to_qpos/f2q).""" x_joint_dof = torch.atan2(local_rot_mats[..., 2, 1], local_rot_mats[..., 2, 2]) y_joint_dof = torch.atan2(local_rot_mats[..., 0, 2], local_rot_mats[..., 0, 0]) z_joint_dof = torch.atan2(local_rot_mats[..., 1, 0], local_rot_mats[..., 1, 1]) xyz_joint_dofs = torch.stack([x_joint_dof, y_joint_dof, z_joint_dof], dim=-1) axis_vals = axis_vals.to(device=local_rot_mats.device, dtype=local_rot_mats.dtype) joint_dofs = (xyz_joint_dofs * axis_vals[None, None, :, :]).sum(dim=-1) return joint_dofs def _local_rots_to_joint_dofs_axis_angle( self, local_rot_mats: torch.Tensor, axis_vals: torch.Tensor, ) -> torch.Tensor: """Extract per-joint single-DoF angles (radians) via axis-angle; round-trips with axis_angle_to_matrix. Args: local_rot_mats: (..., num_hinges, 3, 3) in same frame as axis_vals. axis_vals: (num_hinges, 3) unit axis per hinge. Returns: joint_dofs: (..., num_hinges) signed angle = dot(axis_angle(R), axis). """ axis_vals = axis_vals.to(device=local_rot_mats.device, dtype=local_rot_mats.dtype) full_aa = matrix_to_axis_angle(local_rot_mats) joint_dofs = (full_aa * axis_vals).sum(dim=-1) return joint_dofs def _local_rots_f2q_to_joint_dofs(self, local_rot_mats_f2q: torch.Tensor) -> torch.Tensor: """Extract per-joint single-DoF angles from local rotations in f2q space (for to_qpos).""" axis_vals = self._mujoco_joint_axis_values_f2q_space return self._local_rots_to_joint_dofs(local_rot_mats_f2q, axis_vals) def _clamp_to_limits(self, joint_dofs: torch.Tensor) -> torch.Tensor: """Clamp joint angles to XML limits (radians). Angles are in kimodo convention (0 = rest). """ device = joint_dofs.device lo = self._joint_limits_min.to(device=device, dtype=joint_dofs.dtype) hi = self._joint_limits_max.to(device=device, dtype=joint_dofs.dtype) return torch.clamp(joint_dofs, lo[None, None, :], hi[None, None, :]) def _clamp_joint_dofs(self, joint_dofs: torch.Tensor, rest_dofs: torch.Tensor) -> torch.Tensor: """Clamp joint angles to MuJoCo limits (radians), with rest_dofs conversion.""" device = joint_dofs.device rest_dofs = rest_dofs.to(device=device, dtype=joint_dofs.dtype) mujoco_dofs = joint_dofs - rest_dofs[None, None, :] lo = self._joint_limits_min.to(device=device, dtype=joint_dofs.dtype) hi = self._joint_limits_max.to(device=device, dtype=joint_dofs.dtype) mujoco_dofs = torch.clamp(mujoco_dofs, lo[None, None, :], hi[None, None, :]) return mujoco_dofs + rest_dofs[None, None, :] def _joint_dofs_to_local_rot_mats( self, joint_dofs: torch.Tensor, original_local_rot_mats: torch.Tensor, device: torch.device, dtype: torch.dtype, use_relative: bool = False, ) -> torch.Tensor: """Reconstruct full local rotation matrices from 1-DoF angles.""" out = original_local_rot_mats.clone() axis_kimodo = self._mujoco_joint_axis_values_kimodo_space.to(device=device, dtype=dtype) for i in range(joint_dofs.shape[-1]): j = self._mujoco_indices_to_kimodo_indices[i].item() angle = joint_dofs[..., i] axis = axis_kimodo[i] if use_relative: axis_angle = angle[..., None] * axis[None, None, :] R_local = axis_angle_to_matrix(axis_angle) else: rot_offsets_f2q = self._rot_offsets_f2q.to(device=device, dtype=dtype) axis_in_f2q = torch.mv(rot_offsets_f2q[j], axis) axis_angle = angle[..., None] * axis_in_f2q[None, None, :] R_f2q = axis_angle_to_matrix(axis_angle) R_local = torch.einsum("ij,btjk->btik", rot_offsets_f2q[j].T, R_f2q) out[:, :, j, :, :] = R_local return out @ensure_batched(local_rot_mats=5, root_positions=3, lengths=1) def project_to_real_robot_rotations( self, local_rot_mats: torch.Tensor, root_positions: torch.Tensor, clamp_to_limits: bool = True, mujoco_rest_zero: bool = False, ) -> dict: """Project full 3D local rotations to G1 real robot DoF and back to 3D for viz. Joint angles are extracted along each hinge axis, optionally clamped to XML limits, then reconstructed to 3D rotations. When mujoco_rest_zero=False (default), raw angles are used (baked-with-quat). When True, angles are relative to rest (0 = T-pose in MuJoCo). """ device = local_rot_mats.device dtype = local_rot_mats.dtype # Transform to f2q frame and extract 1-DoF angles (axis-angle projection). local_rot_f2q = torch.matmul(self._rot_offsets_f2q.to(device=device, dtype=dtype), local_rot_mats) hinge_rots = local_rot_f2q[:, :, self._mujoco_indices_to_kimodo_indices, :, :] axis_f2q = self._mujoco_joint_axis_values_f2q_space.to(device=device, dtype=dtype) joint_dofs = self._local_rots_to_joint_dofs_axis_angle(hinge_rots, axis_f2q) # Optionally express angles relative to rest (MuJoCo q=0 at T-pose). if mujoco_rest_zero: rest_dofs = self._rest_dofs_axis_angle.to(device=device, dtype=dtype) angles = joint_dofs - rest_dofs[None, None, :] use_relative = True else: angles = joint_dofs use_relative = False if clamp_to_limits: if mujoco_rest_zero: angles = self._clamp_to_limits(angles) else: rest_dofs_aa = self._rest_dofs_axis_angle.to(device=device, dtype=dtype) angles = self._clamp_joint_dofs(angles, rest_dofs_aa) # Reconstruct 3D local rotations from 1-DoF angles and run FK. local_rot_mats_proj = self._joint_dofs_to_local_rot_mats( angles, local_rot_mats, device, dtype, use_relative=use_relative ) global_rot_mats, posed_joints, _ = self.skeleton.fk(local_rot_mats_proj, root_positions) return { "local_rot_mats": local_rot_mats_proj, "global_rot_mats": global_rot_mats, "posed_joints": posed_joints, "root_positions": root_positions, } @ensure_batched(local_rot_mats=5, root_positions=3, lengths=1) def to_qpos( self, local_rot_mats: torch.Tensor, root_positions: torch.Tensor, root_quat_w_first: bool = True, mujoco_rest_zero: bool = False, ) -> torch.Tensor: """Fast batch conversion from kimodo features to mujoco qpos format. Args: local_rot_mats: (B, T, J, 3, 3) local rotation matrices (kimodo convention). root_positions: (B, T, 3) root positions. root_quat_w_first: If True, quaternion in qpos is (w,x,y,z). mujoco_rest_zero: If True, joint angles are written so that kimodo rest (t-pose) maps to q=0 in MuJoCo. If False, write raw joint_dofs. Returns: torch.Tensor of shape [batch, numFrames, 36] containing mujoco qpos data: - root_trans (3) + root_quat (4) + joint_dofs (29) = 36 columns """ batch_size, num_frames, nb_joints = local_rot_mats.shape[:3] device, dtype = local_rot_mats.device, local_rot_mats.dtype local_rot_mats = torch.matmul(self._rot_offsets_f2q.to(device), local_rot_mats) batch_size, num_frames = root_positions.shape[0], root_positions.shape[1] # Move precomputed matrices to the same device/dtype kimodo_to_mujoco_matrix = self.kimodo_to_mujoco_matrix.to(device=device, dtype=dtype) # Initialize output tensor: [batch, numFrames, 36] qpos = torch.zeros((batch_size, num_frames, 36), dtype=dtype, device=device) # Convert root translation: apply coordinate transformation root_positions_mujoco = torch.matmul(kimodo_to_mujoco_matrix[None, None, ...], root_positions[..., None]) qpos[:, :, :3] = root_positions_mujoco.view(batch_size, num_frames, 3) # Convert root rotation: apply coordinate transformation to rotation matrix root_rot = local_rot_mats[:, :, 0, :] # [batch, numFrames, 3, 3] # Apply coordinate transformation: R_mujoco = kimodo_to_mujoco * R_kimodo * kimodo_to_mujoco^T mujoco_to_kimodo_matrix = kimodo_to_mujoco_matrix.T root_rot_mujoco = torch.matmul( torch.matmul(kimodo_to_mujoco_matrix[None, None, ...], root_rot), mujoco_to_kimodo_matrix[None, None, ...], ) root_rot_quat = matrix_to_quaternion(root_rot_mujoco) # [w, x, y, z] if root_quat_w_first: qpos[:, :, 3:7] = root_rot_quat[:, :, [0, 1, 2, 3]] # [w, x, y, z] else: qpos[:, :, 3:7] = root_rot_quat[:, :, [1, 2, 3, 0]] # [w, x, y, z] -> [x, y, z, w] # Joint DOFs: raw angles or relative to rest (rest = q=0 in MuJoCo). joint_rot_f2q = local_rot_mats[:, :, self._mujoco_indices_to_kimodo_indices, :, :] joint_dofs = self._local_rots_f2q_to_joint_dofs(joint_rot_f2q) if mujoco_rest_zero: rest_dofs = self._rest_dofs.to(device=device, dtype=dtype) qpos[:, :, 7:] = joint_dofs - rest_dofs[None, None, :] else: qpos[:, :, 7:] = joint_dofs return qpos def apply_g1_real_robot_projection( skeleton: G1Skeleton34, joints_pos: torch.Tensor, joints_rot: torch.Tensor, clamp_to_limits: bool = True, ) -> tuple[torch.Tensor, torch.Tensor]: """Project G1 motion to real robot DoF (1-DoF per joint) with optional axis limits. Extracts a single angle per hinge along its axis (1-DoF), optionally clamps to joint limits from the MuJoCo XML (when clamp_to_limits=True), then reconstructs 3D rotations and runs FK. T-pose (identity local rotations) is preserved. Args: skeleton: G1 skeleton instance. joints_pos: (T, J, 3) or (B, T, J, 3) joint positions in global space. joints_rot: (T, J, 3, 3) or (B, T, J, 3, 3) global rotation matrices. clamp_to_limits: If True, clamp joint angles to XML axis limits (default True). Returns: (posed_joints, global_rot_mats) as tensors, same shape as inputs (batch preserved). """ local_rot_mats = global_rots_to_local_rots(joints_rot, skeleton) root_positions = joints_pos[..., skeleton.root_idx, :] # Converter expects batch dim (B, T, ...); add and remove if single sequence. single_sequence = local_rot_mats.dim() == 4 if single_sequence: local_rot_mats = local_rot_mats.unsqueeze(0) root_positions = root_positions.unsqueeze(0) converter = MujocoQposConverter(skeleton) projected = converter.project_to_real_robot_rotations( local_rot_mats, root_positions, clamp_to_limits=clamp_to_limits ) out_pos = projected["posed_joints"] out_rot = projected["global_rot_mats"] if single_sequence: out_pos = out_pos.squeeze(0) out_rot = out_rot.squeeze(0) return out_pos, out_rot ================================================ FILE: kimodo/exports/smplx.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Convert kimodo motion to AMASS/SMPL-X compatible parameters (axis-angle, Y-up or Z-up).""" import os from typing import Optional import einops import numpy as np import torch from kimodo.assets import skeleton_asset_path from kimodo.geometry import axis_angle_to_matrix, matrix_to_axis_angle from kimodo.tools import ensure_batched, to_numpy, to_torch def kimodo_y_up_to_amass_coord_rotation_matrix() -> np.ndarray: """3x3 rotation mapping Kimodo Y-up (+Z forward) to AMASS Z-up (+Y forward). Used by :func:`get_amass_parameters` and :func:`amass_arrays_to_kimodo_motion` (inverse). """ y_up_to_z_up = np.array( [ [1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0], ], dtype=np.float32, ) rot_z_180 = np.array( [ [-1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, 1.0], ], dtype=np.float32, ) return np.matmul(rot_z_180, y_up_to_z_up).astype(np.float32) @ensure_batched(local_rot_mats=5, root_positions=3, lengths=1) def get_amass_parameters( local_rot_mats, root_positions, skeleton, z_up=True, ): """Convert local rot mats and root positions to AMASS-style trans and pose_body; optional z_up coordinate transform. Our method generates motions with Y-up and +Z forward; if z_up=True, transform to Z-up and +Y forward as in AMASS. """ # Our method generate motions with Y-up and +Z forward # if z_up = True, we transform this to: Z-up with +Y forward, as in AMASS # Remove the root offset; SMPL-X FK adds pelvis offset back. pelvis_offset = skeleton.neutral_joints[skeleton.root_idx].cpu().numpy() trans = root_positions - pelvis_offset root_rot_mats = to_numpy(local_rot_mats[:, :, 0]) local_rot_axis_angle = to_numpy(matrix_to_axis_angle(to_torch(local_rot_mats))) pose_body = einops.rearrange(local_rot_axis_angle[:, :, 1:], "b t j d -> b t (j d)") # Optionally convert from Y-up to Z-up coordinates. if z_up: y_up_to_z_up = kimodo_y_up_to_amass_coord_rotation_matrix() root_rot_mats = np.matmul(y_up_to_z_up, root_rot_mats) trans = np.matmul(trans + pelvis_offset, y_up_to_z_up.T) - pelvis_offset root_orient = to_numpy(matrix_to_axis_angle(to_torch(root_rot_mats))) return trans, root_orient, pose_body def amass_arrays_to_kimodo_motion( trans: np.ndarray, root_orient: np.ndarray, pose_body: np.ndarray, skeleton, source_fps: float, *, z_up: bool = True, ): """Inverse of :func:`get_amass_parameters` for a single sequence (AMASS → Kimodo motion dict). Args: trans: ``(T, 3)`` AMASS root translation (same as ``trans`` in AMASS NPZ). root_orient: ``(T, 3)`` axis-angle root orientation in AMASS coordinates (z-up when ``z_up``). pose_body: ``(T, 63)`` body pose axis-angle (21 joints × 3). skeleton: :class:`~kimodo.skeleton.definitions.SMPLXSkeleton22` instance. source_fps: Source frame rate (Hz) of the AMASS recording. z_up: If ``True``, invert the same Y-up↔Z-up transform as ``get_amass_parameters(..., z_up=True)``. Returns: Motion dict compatible with :func:`kimodo.exports.motion_io.save_kimodo_npz`. """ from kimodo.exports.motion_io import complete_motion_dict trans = np.asarray(trans, dtype=np.float32) root_orient = np.asarray(root_orient, dtype=np.float32) pose_body = np.asarray(pose_body, dtype=np.float32) if trans.ndim != 2 or trans.shape[-1] != 3: raise ValueError(f"trans must be (T, 3); got {trans.shape}") if root_orient.shape != trans.shape: raise ValueError(f"root_orient shape {root_orient.shape} must match trans {trans.shape}") t = trans.shape[0] if pose_body.shape != (t, 63): raise ValueError(f"pose_body must be (T, 63); got {pose_body.shape}") pelvis_offset = skeleton.neutral_joints[skeleton.root_idx].detach().cpu().numpy().astype(np.float32) device = skeleton.neutral_joints.device dtype = torch.float32 Y_np = kimodo_y_up_to_amass_coord_rotation_matrix() if z_up: y_up_to_z_up = torch.from_numpy(Y_np).to(device=device, dtype=dtype) # trans_amass = root_kimodo @ Y.T - pelvis_offset => root_kimodo = (trans_amass + pelvis_offset) @ Y root_positions_np = (trans + pelvis_offset) @ Y_np else: root_positions_np = trans + pelvis_offset root_positions = torch.from_numpy(root_positions_np).to(device=device, dtype=dtype) R_amass_root = axis_angle_to_matrix(torch.from_numpy(root_orient).to(device=device, dtype=dtype)) if z_up: R_kimodo_root = torch.einsum("ij,tjk->tik", y_up_to_z_up.T, R_amass_root) else: R_kimodo_root = R_amass_root nb = skeleton.nbjoints if nb != 22: raise ValueError(f"Expected SMPL-X body skeleton with 22 joints; got {nb}") local_rot_mats = torch.zeros((t, nb, 3, 3), device=device, dtype=dtype) local_rot_mats[:, 0] = R_kimodo_root pose_aa = torch.from_numpy(pose_body.reshape(t, 21, 3)).to(device=device, dtype=dtype) local_rot_mats[:, 1:] = axis_angle_to_matrix(pose_aa.reshape(-1, 3)).reshape(t, 21, 3, 3) return complete_motion_dict(local_rot_mats, root_positions, skeleton, source_fps) def amass_npz_to_kimodo_motion(npz_path: str, skeleton, source_fps: Optional[float] = None, *, z_up: bool = True): """Load an AMASS-style ``.npz`` and return a Kimodo motion dict. Args: npz_path: Path to AMASS NPZ (``trans``, ``root_orient``, ``pose_body``, ...). skeleton: SMPL-X skeleton instance. source_fps: Source frame rate (Hz); if ``None``, uses ``mocap_frame_rate`` from the file when present, else ``30.0``. z_up: Same meaning as :func:`amass_arrays_to_kimodo_motion`. """ with np.load(npz_path, allow_pickle=True) as data: trans = np.asarray(data["trans"], dtype=np.float32) root_orient = np.asarray(data["root_orient"], dtype=np.float32) pose_body = np.asarray(data["pose_body"], dtype=np.float32) if source_fps is None: source_fps = float(data["mocap_frame_rate"]) if "mocap_frame_rate" in data.files else 30.0 return amass_arrays_to_kimodo_motion(trans, root_orient, pose_body, skeleton, source_fps, z_up=z_up) class AMASSConverter: def __init__( self, fps, skeleton, beta_path=str(skeleton_asset_path("smplx22", "beta.npy")), mean_hands_path=str(skeleton_asset_path("smplx22", "mean_hands.npy")), ): self.fps = fps self.skeleton = skeleton # Load betas if os.path.exists(beta_path): # only use first 16 betas to match AMASS betas = np.load(beta_path)[:16] else: betas = np.zeros(16) # Load mean hands if os.path.exists(mean_hands_path): mean_hands = np.load(mean_hands_path) else: mean_hands = np.zeros(90) self.default_frame_params = { "pose_jaw": np.zeros(3), "pose_eye": np.zeros(6), "pose_hand": mean_hands, } self.output_dict_base = { "gender": "neutral", "surface_model_type": "smplx", "betas": betas, "num_betas": len(betas), "mocap_frame_rate": float(fps), } def convert_save_npz(self, output: dict, npz_path, z_up=True): trans, root_orient, pose_body = get_amass_parameters( output["local_rot_mats"], output["root_positions"], self.skeleton, z_up=z_up, ) nb_frames = trans.shape[-2] amass_output_base = self.output_dict_base.copy() for key, val in self.default_frame_params.items(): amass_output_base[key] = einops.repeat(val, "d -> t d", t=nb_frames) amass_output_base["mocap_time_length"] = nb_frames / self.fps self.save_npz(trans, root_orient, pose_body, amass_output_base, npz_path) def save_npz(self, trans, root_orient, pose_body, base_output, npz_path): shape = trans.shape if len(shape) == 3 and shape[0] == 1: # if only one motion, squeeze the data trans = trans[0] root_orient = root_orient[0] pose_body = pose_body[0] shape = trans.shape if len(shape) == 2: amass_output = { "trans": trans, "root_orient": root_orient, "pose_body": pose_body, } | base_output np.savez(npz_path, **amass_output) elif len(shape) == 3: # real batch of motions npz_path_base, ext = os.path.splitext(npz_path) for i in range(shape[0]): npz_path_i = npz_path_base + "_" + str(i).zfill(2) + ext self.save_npz(trans[i], root_orient[i], pose_body[i], base_output, npz_path_i) # amass_output = { # "gender": "neutral", # "surface_model_type": "smplx", # "mocap_frame_rate": float(fps), # "mocap_time_length": len(motion) / float(fps) # "trans": trans, # "betas": betas, # "num_betas": len(betas), # "root_orient": np.array([T, 3]), # axis angle # "pose_body": np.array([T, 63]), # 63=21*3, axis angle 21 = 22 - root # "pose_hand": np.array([T, 90]), # 90=30*3=15*2*3 axis angle (load from mean_hands) # "pose_jaw": np.array([T, 3]), # all zeros is fine # "pose_eye": np.array([T, 6]), # all zeros is fine` # } ================================================ FILE: kimodo/geometry.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Rotation and representation conversions: axis-angle, quaternion, matrix, 6D continuous.""" import torch import torch.nn.functional as F def angle_to_Y_rotation_matrix(angle: torch.Tensor) -> torch.Tensor: """Build a rotation matrix around the Y axis from a scalar angle (radians). Shape: angle.shape + (3, 3). """ cos, sin = torch.cos(angle), torch.sin(angle) one, zero = torch.ones_like(angle), torch.zeros_like(angle) mat = torch.stack((cos, zero, sin, zero, one, zero, -sin, zero, cos), -1) mat = mat.reshape(angle.shape + (3, 3)) return mat def matrix_to_cont6d(matrix: torch.Tensor) -> torch.Tensor: """Convert rotation matrix to 6D continuous representation (first two columns). Shape: (..., 3, 3) -> (..., 6). """ cont_6d = torch.concat([matrix[..., 0], matrix[..., 1]], dim=-1) return cont_6d def cont6d_to_matrix(cont6d: torch.Tensor) -> torch.Tensor: """Convert 6D continuous representation to rotation matrix (Gram–Schmidt on two columns). Last dim must be 6. """ assert cont6d.shape[-1] == 6, "The last dimension must be 6" x_raw = cont6d[..., 0:3] y_raw = cont6d[..., 3:6] x = x_raw / torch.norm(x_raw, dim=-1, keepdim=True) z = torch.cross(x, y_raw, dim=-1) z = z / torch.norm(z, dim=-1, keepdim=True) y = torch.cross(z, x, dim=-1) x = x[..., None] y = y[..., None] z = z[..., None] mat = torch.cat([x, y, z], dim=-1) return mat def axis_angle_to_matrix(axis_angle: torch.Tensor) -> torch.Tensor: """Convert axis-angle to rotation matrix. Args: axis_angle: (..., 3) axis-angle vectors (angle = norm, axis = normalized) Returns: rotmat: (..., 3, 3) rotation matrices """ eps = 1e-6 angle = torch.norm(axis_angle, dim=-1, keepdim=True) # (..., 1) axis = axis_angle / (angle + eps) x, y, z = axis.unbind(-1) zero = torch.zeros_like(x) K = torch.stack([zero, -z, y, z, zero, -x, -y, x, zero], dim=-1).reshape(*axis.shape[:-1], 3, 3) eye = torch.eye(3, device=axis.device, dtype=axis.dtype) eye = eye.expand(*axis.shape[:-1], 3, 3) sin = torch.sin(angle)[..., None] cos = torch.cos(angle)[..., None] R = eye + sin * K + (1 - cos) * (K @ K) return R def matrix_to_axis_angle(R: torch.Tensor) -> torch.Tensor: """Convert rotation matrix to axis-angle via quaternions (more numerically stable). Args: R: (..., 3, 3) rotation matrices Returns: axis_angle: (..., 3) """ # Go through quaternions for numerical stability quat = matrix_to_quaternion(R) # (..., 4) with (w, x, y, z) return quaternion_to_axis_angle(quat) def quaternion_to_axis_angle(quat: torch.Tensor) -> torch.Tensor: """Convert quaternion to axis-angle representation. Args: quat: (..., 4) quaternions with real part first (w, x, y, z) Returns: axis_angle: (..., 3) """ eps = 1e-6 # Ensure canonical form to avoid sign ambiguity. # Primary: prefer w > 0. When w ≈ 0 (angle ≈ π), prefer first nonzero xyz > 0. w = quat[..., 0:1] xyz = quat[..., 1:] # Find first significant component of xyz for tie-breaking when w ≈ 0 first_significant = xyz[..., 0:1] # use x component as tie-breaker # Flip if: w < 0, OR (w ≈ 0 AND first xyz component < 0) should_flip = (w < -eps) | ((w.abs() <= eps) & (first_significant < 0)) quat = torch.where(should_flip, -quat, quat) w = quat[..., 0] xyz = quat[..., 1:] # sin(angle/2) = ||xyz|| sin_half_angle = xyz.norm(dim=-1) # angle = 2 * atan2(sin(angle/2), cos(angle/2)) # This is more stable than 2 * acos(w) near angle=0 angle = 2.0 * torch.atan2(sin_half_angle, w) # axis = xyz / sin(angle/2), but handle small angles # For small angles: axis-angle ≈ 2 * xyz (since sin(x) ≈ x for small x) small_angle = sin_half_angle.abs() < eps # Safe division scale = torch.where( small_angle, 2.0 * torch.ones_like(angle), # small angle: axis_angle ≈ 2 * xyz angle / sin_half_angle.clamp(min=eps), ) return xyz * scale.unsqueeze(-1) def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor: """Returns torch.sqrt(torch.max(0, x)) subgradient is zero where x is 0.""" return torch.sqrt(x * (x > 0).to(x.dtype)) def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor: """Convert rotations given as rotation matrices to quaternions. Args: matrix: Rotation matrices as tensor of shape (..., 3, 3). Returns: quaternions with real part first, as tensor of shape (..., 4). """ if matrix.size(-1) != 3 or matrix.size(-2) != 3: raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") batch_dim = matrix.shape[:-2] m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(matrix.reshape(batch_dim + (9,)), dim=-1) q_abs = _sqrt_positive_part( torch.stack( [ 1.0 + m00 + m11 + m22, 1.0 + m00 - m11 - m22, 1.0 - m00 + m11 - m22, 1.0 - m00 - m11 + m22, ], dim=-1, ) ) quat_by_rijk = torch.stack( [ torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1), torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1), torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1), torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1), ], dim=-2, ) flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device) quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr)) return ( (F.one_hot(q_abs.argmax(dim=-1), num_classes=4)[..., None] * quat_candidates) .sum(dim=-2) .reshape(batch_dim + (4,)) ) def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor: """Convert rotations given as quaternions to rotation matrices. Args: quaternions: quaternions with real part first, as tensor of shape (..., 4). Returns: Rotation matrices as tensor of shape (..., 3, 3). """ r, i, j, k = torch.unbind(quaternions, -1) two_s = 2.0 / (quaternions * quaternions).sum(-1) o = torch.stack( ( 1 - two_s * (j * j + k * k), two_s * (i * j - k * r), two_s * (i * k + j * r), two_s * (i * j + k * r), 1 - two_s * (i * i + k * k), two_s * (j * k - i * r), two_s * (i * k - j * r), two_s * (j * k + i * r), 1 - two_s * (i * i + j * j), ), -1, ) return o.reshape(quaternions.shape[:-1] + (3, 3)) ================================================ FILE: kimodo/meta.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Parse and normalize prompt text/duration data from meta dicts.""" import os from typing import Any, Optional from kimodo.tools import load_json from .sanitize import sanitize_text, sanitize_texts def load_prompts_from_meta(meta_path: str, **kwargs): """Load prompts from a meta dict or file. If fps is provided, the durations are converted to frames. Args: meta_path: Path to the meta file. **kwargs: Additional arguments to pass to parse_prompts_from_meta. Returns: texts: List of texts. durations: List of durations in seconds or frames. """ if not os.path.exists(meta_path): raise FileNotFoundError(f"meta.json not found in input folder: {meta_path}") meta = load_json(meta_path) return parse_prompts_from_meta(meta, **kwargs) def parse_prompts_from_meta( meta: dict[str, Any], fps: Optional[float] = None, sanitize: bool = False, ) -> tuple[list[str], list[float]]: """Parse prompt texts and durations from a meta dict into normalized lists. If fps is provided, the durations are converted to frames. Accepts either: - Single prompt: "text" (str) and "duration" (float) in seconds. - Multiple prompts: "texts" (list of str) and "durations" (list of float) in seconds. Returns: (texts, durations): texts as list of str, durations as list of float (seconds or frames). Lengths of both lists are equal. Raises: ValueError: If meta does not contain a recognized format. """ # Single prompt if "text" in meta and "duration" in meta: text = meta["text"] duration = float(meta["duration"]) if fps is not None: duration = int(duration * fps) if isinstance(text, list): raise ValueError("meta has 'text' but it is a list; use 'texts' for multiple prompts") if sanitize: text = sanitize_text(text) return ([text], [duration]) # Multiple prompts if "texts" in meta and "durations" in meta: texts = meta["texts"] durations = meta["durations"] if not isinstance(texts, list) or not isinstance(durations, list): raise ValueError("meta 'texts' and 'durations' must be lists") if len(texts) != len(durations): raise ValueError(f"meta 'texts' and 'durations' length mismatch: {len(texts)} vs {len(durations)}") durations = [float(d) for d in durations] if fps is not None: durations = [int(d * fps) for d in durations] if sanitize: texts = sanitize_texts(texts) return texts, durations raise ValueError("meta must contain either 'text' and 'duration', or 'texts' and 'durations'.") ================================================ FILE: kimodo/metrics/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Evaluation metrics for motion quality (foot skate, contact consistency, constraint following).""" from .base import ( Metric, aggregate_metrics, clear_metrics, compute_metrics, ) from .constraints import ContraintFollow from .foot_skate import ( FootContactConsistency, FootSkateFromContacts, FootSkateFromHeight, FootSkateRatio, ) from .tmr import ( TMR_EmbeddingMetric, TMR_Metric, compute_tmr_per_sample_retrieval, compute_tmr_retrieval_metrics, ) __all__ = [ "Metric", "ContraintFollow", "FootContactConsistency", "FootSkateFromContacts", "FootSkateFromHeight", "FootSkateRatio", "TMR_EmbeddingMetric", "TMR_Metric", "aggregate_metrics", "clear_metrics", "compute_metrics", "compute_tmr_per_sample_retrieval", "compute_tmr_retrieval_metrics", ] ================================================ FILE: kimodo/metrics/base.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Base metric class and batch/aggregate helpers.""" from __future__ import annotations from collections import defaultdict from typing import Dict, List import torch class Metric: """Base class for metrics that accumulate results over multiple __call__ and expose aggregate().""" def __init__(self, **kwargs): self.clear() def __call__(self, *args, **kwargs): """Compute metric for current batch, append to saved_metrics, and return the batch result.""" metrics = self._compute(*args, **kwargs) for key, val in metrics.items(): self.saved_metrics[key].append(val.detach().cpu().float()) return metrics def _compute(self, **kwargs): """Subclasses implement this to compute metric dict from batch inputs.""" raise NotImplementedError() def clear(self): """Reset all accumulated metric values.""" self.saved_metrics = defaultdict(list) def aggregate(self): """Return a dict of concatenated/stacked tensors over all accumulated batches.""" output = {} for key, lst in self.saved_metrics.items(): try: output[key] = torch.cat(lst) except RuntimeError: output[key] = torch.stack(lst) return output def compute_metrics(metrics_list: List[Metric], metrics_in: Dict) -> Dict: """Run each metric on metrics_in and return the combined dict of batch results.""" metrics_out = {} for metric in metrics_list: metrics_out.update(metric(**metrics_in)) return metrics_out def aggregate_metrics(metrics_list: List[Metric]) -> Dict: """Return combined aggregated results (concatenated over batches) for all metrics.""" metrics_out = {} for metric in metrics_list: metrics_out.update(metric.aggregate()) return metrics_out def clear_metrics(metrics_list: List[Metric]) -> None: """Clear accumulated values for all metrics in the list.""" for metric in metrics_list: metric.clear() ================================================ FILE: kimodo/metrics/constraints.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Constraint-following metrics.""" from __future__ import annotations from collections import defaultdict from typing import Dict, List, Optional import torch from torch import Tensor from kimodo.constraints import ( EndEffectorConstraintSet, FullBodyConstraintSet, Root2DConstraintSet, ) from kimodo.tools import ensure_batched from .base import Metric class ContraintFollow(Metric): """Constraint-following metric dispatcher for kimodo constraint sets.""" def __init__( self, skeleton, root_threshold: float = 0.10, **kwargs, ): super().__init__(**kwargs) self.skeleton = skeleton self.root_threshold = root_threshold @ensure_batched(posed_joints=4, constraints_lst=2, lengths=1) def _compute( self, posed_joints: Tensor, constraints_lst: Optional[List], lengths: Optional[Tensor] = None, **kwargs, ) -> Dict: if not constraints_lst: return {} root_idx = self.skeleton.root_idx output = defaultdict(list) for posed_joints_s, constraint_lst_s, lengths_s in zip(posed_joints, constraints_lst, lengths): output_seq = defaultdict(list) for constraint in constraint_lst_s: frame_idx = constraint.frame_indices.to(device=posed_joints_s.device, dtype=torch.long) assert frame_idx.max() < lengths_s, "The constraint is defined outsite the lenght of the motion." if frame_idx.numel() == 0: continue if isinstance(constraint, Root2DConstraintSet): pred_root2d = posed_joints_s[frame_idx, root_idx][:, [0, 2]] target = constraint.smooth_root_2d.to(posed_joints_s.device) dist = torch.norm(pred_root2d - target, dim=-1) output_seq["constraint_root2d_err"].append(dist) hit = (dist <= self.root_threshold).float() output_seq["constraint_root2d_acc"].append(hit) elif isinstance(constraint, FullBodyConstraintSet): pred = posed_joints_s[frame_idx] target = constraint.global_joints_positions.to(posed_joints_s.device) err = torch.norm(pred - target, dim=-1) output_seq["constraint_fullbody_keyframe"].append(err) elif isinstance(constraint, EndEffectorConstraintSet): pos_idx = constraint.pos_indices.to(device=posed_joints_s.device, dtype=torch.long) pred = posed_joints_s[frame_idx].index_select(1, pos_idx) target = constraint.global_joints_positions.to(posed_joints_s.device).index_select(1, pos_idx) err = torch.norm(pred - target, dim=-1) output_seq["constraint_end_effector"].append(err) # in case we have several same constraints in the list for key, val in output_seq.items(): output[key].append(torch.cat(val).mean()) reduced = {} for key, vals in output.items(): reduced[key] = torch.stack(vals, dim=0) return reduced ================================================ FILE: kimodo/metrics/foot_skate.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Foot skate and contact consistency metrics.""" from __future__ import annotations from typing import Dict, Optional import torch from torch import Tensor from kimodo.motion_rep.feature_utils import compute_vel_xyz from kimodo.motion_rep.feet import foot_detect_from_pos_and_vel from kimodo.skeleton import SkeletonBase from kimodo.tools import ensure_batched from .base import Metric def get_four_contacts(fidx: list): if len(fidx) == 4: return fidx if len(fidx) == 6: # For soma77 # remove "LeftToeEnd" and "RightToeEnd" fidx = fidx[:2] + fidx[3:5] return fidx raise ValueError("Expects 4 or 6 foot joints (heel/toe per foot)") class FootSkateFromHeight(Metric): """When toe joint is near the floor, measures mean velocity of the toes.""" def __init__( self, skeleton: SkeletonBase, fps: float, height_thresh: float = 0.05, **kwargs, ): super().__init__(**kwargs) self.height_thresh = height_thresh self.skeleton = skeleton self.fps = fps @ensure_batched(posed_joints=4, lengths=1) def _compute( self, posed_joints: Tensor, lengths: Optional[Tensor] = None, **kwargs, ) -> Dict: fidx = self.skeleton.foot_joint_idx fidx = get_four_contacts(fidx) feet_pos = posed_joints[:, :, fidx] toe_pos = feet_pos[:, :, [1, 3]] toe_on_floor = (toe_pos[..., 1] < self.height_thresh)[:, :-1] # y-up [B, T, 2] where [left right] dt = 1.0 / self.fps toe_vel = torch.norm(toe_pos[:, 1:] - toe_pos[:, :-1], dim=-1) / dt # [B, nframes-1, 2] # compute err contact_toe_vel = toe_vel * toe_on_floor # vel when corresponding toe is on ground # account for generated length # since they are velocities use length-1 to avoid inaccurate vel going one frame past len device = toe_on_floor.device len_mask = torch.arange(toe_on_floor.shape[1], device=device)[None, :, None].expand(toe_on_floor.shape) < ( lengths[:, None, None] - 1 ) toe_on_floor = toe_on_floor * len_mask contact_toe_vel = contact_toe_vel * len_mask mean_vel = torch.sum(contact_toe_vel, (1, 2)) / (torch.sum(toe_on_floor, (1, 2)) + 1e-6) return {"foot_skate_from_height": mean_vel} class FootSkateFromContacts(Metric): """Measures velocity of the toes and ankles when predicted to be in contact.""" def __init__( self, skeleton: SkeletonBase, fps: float, **kwargs, ): super().__init__(**kwargs) self.skeleton = skeleton self.fps = fps @ensure_batched(posed_joints=4, foot_contacts=3, lengths=1) def _compute( self, posed_joints: Tensor, foot_contacts: Tensor, lengths: Optional[Tensor] = None, **kwargs, ) -> Dict: fidx = self.skeleton.foot_joint_idx fidx = get_four_contacts(fidx) feet_pos = posed_joints[:, :, fidx] dt = 1.0 / self.fps foot_vel = torch.norm(feet_pos[:, 1:] - feet_pos[:, :-1], dim=-1) / dt if foot_contacts.shape[-1] == 6: # For soma77 # remove "LeftToeEnd" and "RightToeEnd" foot_contacts = foot_contacts[..., [0, 1, 3, 4]] foot_contacts = foot_contacts[:, :-1] vel_err = foot_vel * foot_contacts # account for generated length # since they are velocities use length-1 to avoid inaccurate vel going one frame past len device = foot_contacts.device len_mask = torch.arange(foot_contacts.shape[1], device=device)[None, :, None].expand(foot_contacts.shape) < ( lengths[:, None, None] - 1 ) foot_contacts = foot_contacts * len_mask vel_err = vel_err * len_mask mean_vel = torch.sum(vel_err, (1, 2)) / (torch.sum(foot_contacts, (1, 2)) + 1e-6) # mean over contacting frames # Compute max velocity error across all feet and frames (per batch) max_vel = vel_err.amax(dim=(1, 2)) # [B] return { "foot_skate_from_pred_contacts": mean_vel, "foot_skate_max_vel": max_vel, } class FootSkateRatio(Metric): """Compute fraction of frames where the foot skates when it is on the ground. Inspired by GMD: https://github.com/korrawe/guided-motion-diffusion/blob/main/data_loaders/humanml/utils/metrics.py#L204 """ def __init__( self, skeleton: SkeletonBase, fps: float, height_thresh=0.05, vel_thresh=0.2, **kwargs, ): super().__init__(**kwargs) self.height_thresh = height_thresh self.vel_thresh = vel_thresh self.skeleton = skeleton self.fps = fps @ensure_batched(posed_joints=4, foot_contacts=3, lengths=1) def _compute( self, posed_joints: Tensor, foot_contacts: Tensor, lengths: Optional[Tensor] = None, **kwargs, ) -> Dict: fidx = self.skeleton.foot_joint_idx fidx = get_four_contacts(fidx) feet_pos = posed_joints[:, :, fidx] toe_pos = feet_pos[:, :, [1, 3]] toe_on_floor = toe_pos[..., 1] < self.height_thresh # y-up [B, T, 2] where [left right] # current and next frame on floor to consider it in contact toe_on_floor = torch.logical_and(toe_on_floor[:, :-1], toe_on_floor[:, 1:]) # [B, T-1, 2] dt = 1.0 / self.fps toe_vel = torch.norm(toe_pos[:, 1:] - toe_pos[:, :-1], dim=-1) / dt # [B, nframes-1, 2] # compute err contact_toe_vel = toe_vel * toe_on_floor # vel when corresponding toe is on ground # account for generated length # since they are velocities use length-1 to avoid inaccurate vel going one frame past len device = toe_on_floor.device len_mask = torch.arange(toe_on_floor.shape[1], device=device)[None, :, None].expand(toe_on_floor.shape) < ( lengths[:, None, None] - 1 ) toe_on_floor = toe_on_floor * len_mask contact_toe_vel = contact_toe_vel * len_mask # skating if velocity during contact > thresh toe_skate = contact_toe_vel > self.vel_thresh skate_ratio = torch.sum(toe_skate, (1, 2)) / (torch.sum(toe_on_floor, (1, 2)) + 1e-6) return {"foot_skate_ratio": skate_ratio} class FootContactConsistency(Metric): """Measures consistency between heuristic detected foot contacts (from height and velocity) and predicted foot contacts. i.e. accuracy of how well predicted matches heuristic. """ def __init__( self, skeleton: SkeletonBase, fps: float, vel_thresh: float = 0.15, height_thresh: float = 0.10, **kwargs, ): super().__init__(**kwargs) self.vel_thresh = vel_thresh self.height_thresh = height_thresh self.skeleton = skeleton self.fps = fps @ensure_batched(posed_joints=4, foot_contacts=3, lengths=1) def _compute( self, posed_joints: Tensor, foot_contacts: Tensor, lengths: Optional[Tensor] = None, **kwargs, ) -> Dict: velocity = compute_vel_xyz(posed_joints, float(self.fps), lengths=lengths) heuristic_contacts = foot_detect_from_pos_and_vel( posed_joints, velocity, self.skeleton, self.vel_thresh, self.height_thresh, ) if foot_contacts.shape[-1] == 6: # For soma77 # remove "LeftToeEnd" and "RightToeEnd" foot_contacts = foot_contacts[..., [0, 1, 3, 4]] num_contacts = foot_contacts.shape[-1] incorrect = torch.logical_xor(heuristic_contacts, foot_contacts) # account for generated length # since they are velocities, use length-1 to avoid inaccurate vel going one frame past len device = foot_contacts.device len_mask = torch.arange(foot_contacts.shape[1], device=device)[None, :, None].expand(foot_contacts.shape) < ( lengths[:, None, None] - 1 ) incorrect = incorrect * len_mask incorrect_ratio = torch.sum(incorrect, (1, 2)) / (num_contacts * (lengths - 1)) accuracy = 1 - incorrect_ratio return {"foot_contact_consistency": accuracy} ================================================ FILE: kimodo/metrics/tmr.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """TMR evaluation metrics: text-motion retrieval, R-Precision, and related scores.""" from __future__ import annotations from collections import defaultdict from typing import Any, Dict, List, Optional import numpy as np import torch from scipy import linalg from torch import Tensor from kimodo.model.tmr import TMR from .base import Metric # Scores are between 0 and 1 def get_score_matrix_unit(x, y): sim_matrix = np.einsum("b i, c i -> b c", x, y) scores = sim_matrix / 2 + 0.5 return scores def get_scores_unit(x, y): similarity = np.einsum("... i, ... i", x, y) scores = similarity / 2 + 0.5 return scores def compute_tmr_per_sample_retrieval( motion_emb: np.ndarray, text_emb: np.ndarray, sample_ids: List[str], texts: List[str], top_k: int = 5, ) -> List[Dict[str, Any]]: """For each sample (text query i), compute t2m rank of motion i and top-k retrieved motions with ids and texts. Returns list of dicts: [{"rank": int, "top_k": [{"id": str, "text": str}, ...]}, ...]. """ motion_emb = np.asarray(motion_emb).squeeze() text_emb = np.asarray(text_emb).squeeze() if motion_emb.ndim == 1: motion_emb = motion_emb[np.newaxis, :] if text_emb.ndim == 1: text_emb = text_emb[np.newaxis, :] n = motion_emb.shape[0] assert text_emb.shape[0] == n and len(sample_ids) == n and len(texts) == n scores = get_score_matrix_unit(text_emb, motion_emb) out: List[Dict[str, Any]] = [] for i in range(n): row = np.asarray(scores[i]) order = np.argsort(row)[::-1] rank = int(np.where(order == i)[0][0]) + 1 top_indices = order[:top_k] top_k_list = [{"id": sample_ids[j], "text": texts[j]} for j in top_indices] out.append({"rank": rank, "top_k": top_k_list}) return out class TMR_Metric(Metric): def __init__( self, tmr_model: TMR, ranks: List = [1, 2, 3, 5, 10], ranks_rounding=2, **kwargs, ): super().__init__(**kwargs) self.tmr_model = tmr_model self.ranks = ranks self.ranks_rounding = ranks_rounding def clear(self): self.saved_metrics = defaultdict(list) self.saved_text_latents = [] self.saved_motion_gen_latents = [] self.saved_motion_gt_latents = [] def _compute( self, motion_rep, pred_joints_output: Dict, gt_joints_output: Dict, text_x_dict: Dict, lengths: Tensor, **kwargs, ) -> Dict: pred_posed_joints = pred_joints_output["posed_joints"] original_skeleton = motion_rep.skeleton if motion_rep is not None else None latents_motion = self.tmr_model.encode_motion( pred_posed_joints, lengths=lengths, original_skeleton=original_skeleton, unit_vector=True, ) latents_motion = latents_motion.cpu().numpy() if isinstance(text_x_dict, dict) and "texts" in text_x_dict: latents_text = self.tmr_model.encode_raw_text(text_x_dict["texts"], unit_vector=True) else: latents_text = self.tmr_model.encode_text(text_x_dict, unit_vector=True) if latents_text.dim() == 1: latents_text = latents_text.unsqueeze(0) latents_text = latents_text.cpu().numpy() self.saved_text_latents.append(latents_text) self.saved_motion_gen_latents.append(latents_motion) scores_text = get_scores_unit(latents_motion, latents_text) output = {"TMR/t2m_sim": scores_text} if gt_joints_output is not None and "posed_joints" in gt_joints_output: gt_posed_joints = gt_joints_output["posed_joints"] gt_latents_motion = self.tmr_model.encode_motion( gt_posed_joints, lengths=lengths, original_skeleton=original_skeleton, unit_vector=True, ) gt_latents_motion = gt_latents_motion.cpu().numpy() self.saved_motion_gt_latents.append(gt_latents_motion) gt_scores_text = get_scores_unit(gt_latents_motion, latents_text) scores_motion = get_scores_unit(latents_motion, gt_latents_motion) output["TMR/t2m_gt_sim"] = gt_scores_text output["TMR/m2m_sim"] = scores_motion # pytorch tensors for key, val in output.items(): output[key] = torch.tensor(val) return output def aggregate(self): output = {} for key, lst in self.saved_metrics.items(): output[key] = np.concatenate(lst) assert self.saved_text_latents, "Should call the metric at least once." text_latents = np.concatenate(self.saved_text_latents) motion_gen_latents = np.concatenate(self.saved_motion_gen_latents) batch_size = len(text_latents) assert text_latents.shape == motion_gen_latents.shape scores_t2m = get_score_matrix_unit(text_latents, motion_gen_latents) scores_t2t = get_score_matrix_unit(text_latents, text_latents) t2m_metrics = contrastive_metrics( scores=scores_t2m, scores_t2t=scores_t2t, threshold=0.99, rounding=2, ) for key, val in t2m_metrics.items(): output["TMR/t2m_R/" + key] = val mu_gen, cov_gen = calculate_activation_statistics(motion_gen_latents) mu_text, cov_text = calculate_activation_statistics(text_latents) fid_gen_text = calculate_frechet_distance(mu_gen, cov_gen, mu_text, cov_text) output["TMR/FID/gen_text"] = fid_gen_text if self.saved_motion_gt_latents: motion_gt_latents = np.concatenate(self.saved_motion_gt_latents) assert motion_gt_latents.shape == motion_gen_latents.shape scores_m2gm = get_score_matrix_unit(motion_gen_latents, motion_gt_latents) scores_t2gm = get_score_matrix_unit(text_latents, motion_gt_latents) m2gm_metrics = contrastive_metrics( scores=scores_m2gm, scores_t2t=scores_t2t, threshold=0.99, rounding=2, ) for key, val in m2gm_metrics.items(): output["TMR/m2m_R/" + key] = val t2gm_metrics = contrastive_metrics( scores=scores_t2gm, scores_t2t=scores_t2t, threshold=0.99, rounding=2, ) for key, val in t2gm_metrics.items(): output["TMR/t2m_gt_R/" + key] = val mu_gt_motion, cov_gt_motion = calculate_activation_statistics(motion_gt_latents) fid_gen_motion = calculate_frechet_distance( mu_gen, cov_gen, mu_gt_motion, cov_gt_motion, ) output["TMR/FID/gen_gt"] = fid_gen_motion fid_gt_text = calculate_frechet_distance( mu_gt_motion, cov_gt_motion, mu_text, cov_text, ) output["TMR/FID/gt_text"] = fid_gt_text for key, val in output.items(): if isinstance(val, (int, float, np.integer, np.floating)): val = torch.tensor([val for _ in range(batch_size)]) if isinstance(val, np.ndarray): val = torch.from_numpy(val) output[key] = val.cpu().float() return output class TMR_EmbeddingMetric(Metric): """TMR metrics from precomputed motion and text embeddings (no model load). Use in the loop: pass motion_emb and text_emb per sample; aggregate() computes retrieval metrics. """ def __init__(self, ranks_rounding: int = 2, **kwargs): super().__init__(**kwargs) self.ranks_rounding = ranks_rounding def clear(self): self.saved_metrics = defaultdict(list) self.saved_text_latents = [] self.saved_motion_gen_latents = [] self.saved_motion_gt_latents = [] def _compute( self, motion_emb=None, text_emb=None, gt_motion_emb=None, **kwargs, ) -> Dict: if motion_emb is None or text_emb is None: return {} motion_emb = np.asarray(motion_emb) text_emb = np.asarray(text_emb) if motion_emb.ndim == 1: motion_emb = motion_emb[np.newaxis, :] if text_emb.ndim == 1: text_emb = text_emb[np.newaxis, :] self.saved_text_latents.append(text_emb) self.saved_motion_gen_latents.append(motion_emb) if gt_motion_emb is not None: gt_motion_emb = np.asarray(gt_motion_emb) if gt_motion_emb.ndim == 1: gt_motion_emb = gt_motion_emb[np.newaxis, :] self.saved_motion_gt_latents.append(gt_motion_emb) scores = get_scores_unit(motion_emb, text_emb) return {"TMR/t2m_sim": torch.tensor(scores, dtype=torch.float32)} def aggregate(self): output = {} for key, lst in self.saved_metrics.items(): output[key] = np.concatenate(lst) if not self.saved_text_latents: return output text_latents = np.concatenate(self.saved_text_latents) motion_gen_latents = np.concatenate(self.saved_motion_gen_latents) batch_size = len(text_latents) assert text_latents.shape == motion_gen_latents.shape scores_t2m = get_score_matrix_unit(text_latents, motion_gen_latents) scores_t2t = get_score_matrix_unit(text_latents, text_latents) t2m_metrics = contrastive_metrics( scores=scores_t2m, scores_t2t=scores_t2t, threshold=0.99, rounding=self.ranks_rounding, ) for key, val in t2m_metrics.items(): output["TMR/t2m_R/" + key] = val if batch_size >= 2: mu_gen, cov_gen = calculate_activation_statistics(motion_gen_latents) mu_text, cov_text = calculate_activation_statistics(text_latents) output["TMR/FID/gen_text"] = calculate_frechet_distance(mu_gen, cov_gen, mu_text, cov_text) else: output["TMR/FID/gen_text"] = float("nan") if self.saved_motion_gt_latents: motion_gt_latents = np.concatenate(self.saved_motion_gt_latents) assert motion_gt_latents.shape == motion_gen_latents.shape scores_m2gm = get_score_matrix_unit(motion_gen_latents, motion_gt_latents) scores_t2gm = get_score_matrix_unit(text_latents, motion_gt_latents) m2gm_metrics = contrastive_metrics( scores=scores_m2gm, scores_t2t=scores_t2t, threshold=0.99, rounding=self.ranks_rounding, ) for key, val in m2gm_metrics.items(): output["TMR/m2m_R/" + key] = val t2gm_metrics = contrastive_metrics( scores=scores_t2gm, scores_t2t=scores_t2t, threshold=0.99, rounding=self.ranks_rounding, ) for key, val in t2gm_metrics.items(): output["TMR/t2m_gt_R/" + key] = val if batch_size >= 2: mu_gt_motion, cov_gt_motion = calculate_activation_statistics(motion_gt_latents) output["TMR/FID/gen_gt"] = calculate_frechet_distance(mu_gen, cov_gen, mu_gt_motion, cov_gt_motion) output["TMR/FID/gt_text"] = calculate_frechet_distance(mu_gt_motion, cov_gt_motion, mu_text, cov_text) else: output["TMR/FID/gen_gt"] = float("nan") output["TMR/FID/gt_text"] = float("nan") for key, val in output.items(): if isinstance(val, (int, float, np.integer, np.floating)): val = torch.tensor([val for _ in range(batch_size)]) if isinstance(val, np.ndarray): val = torch.from_numpy(val) output[key] = val.cpu().float() return output def compute_tmr_retrieval_metrics( motion_emb: np.ndarray, text_emb: np.ndarray, gt_motion_emb: Optional[np.ndarray] = None, rounding: int = 2, ) -> Dict[str, float]: """Compute TMR retrieval metrics from precomputed embeddings.""" if motion_emb.shape != text_emb.shape: raise ValueError(f"Expected same shape for motion/text embeddings, got {motion_emb.shape} vs {text_emb.shape}") scores_t2m = get_score_matrix_unit(text_emb, motion_emb) scores_t2t = get_score_matrix_unit(text_emb, text_emb) output: Dict[str, float] = {} t2m_metrics = contrastive_metrics( scores=scores_t2m, scores_t2t=scores_t2t, threshold=0.99, rounding=rounding, ) for key, val in t2m_metrics.items(): output[f"TMR/t2m_R/{key}"] = float(val) n_samples = len(motion_emb) if n_samples >= 2: mu_gen, cov_gen = calculate_activation_statistics(motion_emb) mu_text, cov_text = calculate_activation_statistics(text_emb) output["TMR/FID/gen_text"] = float(calculate_frechet_distance(mu_gen, cov_gen, mu_text, cov_text)) else: output["TMR/FID/gen_text"] = float("nan") if gt_motion_emb is not None: if gt_motion_emb.shape != motion_emb.shape: raise ValueError(f"Expected gt motion embeddings shape {motion_emb.shape}, got {gt_motion_emb.shape}") scores_m2gm = get_score_matrix_unit(motion_emb, gt_motion_emb) scores_t2gm = get_score_matrix_unit(text_emb, gt_motion_emb) m2gm_metrics = contrastive_metrics( scores=scores_m2gm, scores_t2t=scores_t2t, threshold=0.99, rounding=rounding, ) for key, val in m2gm_metrics.items(): output[f"TMR/m2m_R/{key}"] = float(val) t2gm_metrics = contrastive_metrics( scores=scores_t2gm, scores_t2t=scores_t2t, threshold=0.99, rounding=rounding, ) for key, val in t2gm_metrics.items(): output[f"TMR/t2m_gt_R/{key}"] = float(val) if n_samples >= 2: mu_gt_motion, cov_gt_motion = calculate_activation_statistics(gt_motion_emb) output["TMR/FID/gen_gt"] = float(calculate_frechet_distance(mu_gen, cov_gen, mu_gt_motion, cov_gt_motion)) output["TMR/FID/gt_text"] = float(calculate_frechet_distance(mu_gt_motion, cov_gt_motion, mu_text, cov_text)) else: output["TMR/FID/gen_gt"] = float("nan") output["TMR/FID/gt_text"] = float("nan") return output def all_contrastive_metrics(sims, emb=None, threshold=None, rounding=2, return_cols=False): text_selfsim = None if emb is not None: text_selfsim = emb @ emb.T t2m_m, t2m_cols = contrastive_metrics(sims, text_selfsim, threshold, return_cols=True, rounding=rounding) m2t_m, m2t_cols = contrastive_metrics(sims.T, text_selfsim, threshold, return_cols=True, rounding=rounding) all_m = {} for key in t2m_m: all_m[f"t2m/{key}"] = t2m_m[key] all_m[f"m2t/{key}"] = m2t_m[key] all_m["t2m/len"] = float(len(sims)) all_m["m2t/len"] = float(len(sims[0])) if return_cols: return all_m, t2m_cols, m2t_cols return all_m def contrastive_metrics( scores, scores_t2t=None, threshold=None, rounding=2, ): n, m = scores.shape assert n == m num_queries = n dists = -scores sorted_dists = np.sort(dists, axis=1) # GT is in the diagonal gt_dists = np.diag(dists)[:, None] if scores_t2t is not None and threshold is not None: real_threshold = 2 * threshold - 1 idx = np.argwhere(scores_t2t > real_threshold) partition = np.unique(idx[:, 0], return_index=True)[1] # take as GT the minimum score of similar values gt_dists = np.minimum.reduceat(dists[tuple(idx.T)], partition) gt_dists = gt_dists[:, None] rows, cols = np.where((sorted_dists - gt_dists) == 0) # find column position of GT # if there are ties if rows.size > num_queries: assert np.unique(rows).size == num_queries, "issue in metric evaluation" avg_cols = break_ties_average(sorted_dists, gt_dists) cols = avg_cols msg = "expected ranks to match queries ({} vs {}) " assert cols.size == num_queries, msg metrics = {} vals = [str(x).zfill(2) for x in [1, 2, 3, 5, 10]] for val in vals: metrics[f"R{val}"] = 100 * float(np.sum(cols < int(val))) / num_queries metrics["MedR"] = float(np.median(cols) + 1) metrics["len"] = num_queries if rounding is not None: for key in metrics: metrics[key] = round(metrics[key], rounding) return metrics def break_ties_average(sorted_dists, gt_dists): # fast implementation, based on this code: # https://stackoverflow.com/a/49239335 locs = np.argwhere((sorted_dists - gt_dists) == 0) # Find the split indices steps = np.diff(locs[:, 0]) splits = np.nonzero(steps)[0] + 1 splits = np.insert(splits, 0, 0) # Compute the result columns summed_cols = np.add.reduceat(locs[:, 1], splits) counts = np.diff(np.append(splits, locs.shape[0])) avg_cols = summed_cols / counts return avg_cols def calculate_activation_statistics(activations): """ Params: -- activation: num_samples x dim_feat Returns: -- mu: dim_feat -- sigma: dim_feat x dim_feat """ mu = np.mean(activations, axis=0) cov = np.cov(activations, rowvar=False) return mu, cov def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): """Numpy implementation of the Frechet Distance. The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) and X_2 ~ N(mu_2, C_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). Stable version by Dougal J. Sutherland. Params: -- mu1 : Numpy array containing the activations of a layer of the inception net (like returned by the function 'get_predictions') for generated samples. -- mu2 : The sample mean over activations, precalculated on an representative dataset set. -- sigma1: The covariance matrix over activations for generated samples. -- sigma2: The covariance matrix over activations, precalculated on an representative dataset set. Returns: -- : The Frechet Distance. """ mu1 = np.atleast_1d(mu1) mu2 = np.atleast_1d(mu2) sigma1 = np.atleast_2d(sigma1) sigma2 = np.atleast_2d(sigma2) assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths" assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions" diff = mu1 - mu2 # Product might be almost singular covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) if not np.isfinite(covmean).all(): msg = ("fid calculation produces singular product; " "adding %s to diagonal of cov estimates") % eps print(msg) offset = np.eye(sigma1.shape[0]) * eps covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) # Numerical error might give slight imaginary component if np.iscomplexobj(covmean): if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): # try again with diagonal %s offset = np.eye(sigma1.shape[0]) * eps covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): m = np.max(np.abs(covmean.imag)) raise ValueError("Imaginary component {}".format(m)) covmean = covmean.real tr_covmean = np.trace(covmean) return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean ================================================ FILE: kimodo/model/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Kimodo model package: main model class, text encoders, and loading utilities.""" from .common import resolve_target from .kimodo_model import Kimodo from .llm2vec import LLM2VecEncoder from .load_model import load_model from .loading import ( AVAILABLE_MODELS, DEFAULT_MODEL, DEFAULT_TEXT_ENCODER_URL, MODEL_NAMES, load_checkpoint_state_dict, ) from .tmr import TMR from .twostage_denoiser import TwostageDenoiser __all__ = [ "Kimodo", "LLM2VecEncoder", "TMR", "TwostageDenoiser", "load_model", "load_checkpoint_state_dict", "resolve_target", "AVAILABLE_MODELS", "DEFAULT_MODEL", "DEFAULT_TEXT_ENCODER_URL", "MODEL_NAMES", ] ================================================ FILE: kimodo/model/backbone.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Transformer backbone: padding, masking, and encoder stack for the denoiser.""" import logging from typing import Optional, Union import torch from omegaconf import ListConfig from pydantic.dataclasses import dataclass from torch import Tensor, nn from torch.nn import TransformerEncoder, TransformerEncoderLayer from kimodo.tools import validate log = logging.getLogger(__name__) def pad_x_and_mask_to_fixed_size(x: Tensor, mask: Tensor, size: int): """Pad a feature vector x and the mask to always have the same size. Args: x (torch.Tensor): [B, T, D] mask (torch.Tensor): [B, T] size (int) Returns: torch.Tensor: [B, size, D] torch.Tensor: [B, size] """ batch_size, cur_max_size, dim = x.shape[0], x.shape[1], x.shape[2] if cur_max_size == size: # already padded to this size, probably in the collate function return x, mask if cur_max_size > size: # This issue should have been handled in the collate function # usefull as a check for test time log.warn("The size of the tensor is larger than the maximum size. Cropping the input..") cur_max_size = size new_x = torch.zeros( (batch_size, size, dim), dtype=x.dtype, device=x.device, ) new_x[:, :cur_max_size] = x # same for the mask new_mask = torch.zeros( (batch_size, size), dtype=mask.dtype, device=mask.device, ) new_mask[:, :cur_max_size] = mask return new_x, new_mask @dataclass(frozen=True, config=dict(extra="forbid", arbitrary_types_allowed=True)) class TransformerEncoderBlockConfig: """Configuration for the transformer encoder backbone.""" # input features dimension input_dim: int # output features dimension output_dim: int # skeleton object skeleton: object # dimension of the text embeddings llm_shape: Union[list[int], ListConfig] # mask the text or not use_text_mask: bool # latent dimension of the model latent_dim: int # dimension of the feedforward network in transformer ff_size: int # num layers in transformer num_layers: int # num heads in transformer num_heads: int # activation in transformer activation: str # dropout rate for the transformer dropout: float # dropout rate for the positional embeddings pe_dropout: float # use norm first or not norm_first: bool = False # artificially extend the number of text tokens num_text_tokens_override: Optional[int] = None # Input first heading angle input_first_heading_angle: bool = False class TransformerEncoderBlock(nn.Module): @validate(TransformerEncoderBlockConfig, save_args=True, super_init=True) def __init__(self, conf): self.nbjoints = self.skeleton.nbjoints llm_dim = self.llm_shape[-1] self.embed_text = nn.Linear(llm_dim, self.latent_dim) self.sequence_pos_encoder = PositionalEncoding(self.latent_dim, self.pe_dropout) # maximum number of tokens self.num_text_tokens = self.llm_shape[0] if self.num_text_tokens_override is not None: self.num_text_tokens = self.num_text_tokens_override self.embed_timestep = TimestepEmbedder(self.latent_dim, self.sequence_pos_encoder) self.input_linear = nn.Linear(self.input_dim, self.latent_dim) self.output_linear = nn.Linear(self.latent_dim, self.output_dim) self.linear_first_heading_angle = nn.Linear(2, self.latent_dim) trans_enc_layer = TransformerEncoderLayer( d_model=self.latent_dim, nhead=self.num_heads, dim_feedforward=self.ff_size, dropout=self.dropout, activation=self.activation, batch_first=True, norm_first=self.norm_first, ) self.seqTransEncoder = TransformerEncoder( trans_enc_layer, num_layers=self.num_layers, enable_nested_tensor=False, ) def forward( self, x: Tensor, x_pad_mask: torch.Tensor, text_feat: torch.Tensor, text_feat_pad_mask: torch.Tensor, timesteps: Tensor, first_heading_angle: Optional[Tensor] = None, ) -> Tensor: """ Args: x (torch.Tensor): [B, T, dim_motion] current noisy motion x_pad_mask (torch.Tensor): [B, T] attention mask, positions with True are allowed to attend, False are not text_feat (torch.Tensor): [B, max_text_len, llm_dim] embedded text prompts text_feat_pad_mask (torch.Tensor): [B, max_text_len] attention mask, positions with True are allowed to attend, False are not timesteps (torch.Tensor): [B,] current denoising step Returns: torch.Tensor: [B, T, output_dim] """ batch_size = len(x) x = self.input_linear(x) # [B, T, D] # Pad the text tokens + mask to always have the same size == self.num_text_tokens # done here if it was not done in the collate function if self.num_text_tokens is not None: text_feat, text_feat_pad_mask = pad_x_and_mask_to_fixed_size( text_feat, text_feat_pad_mask, self.num_text_tokens, ) # Encode the text features and the time information emb_text = self.embed_text(text_feat) # [B, max_text_len, D] emb_time = self.embed_timestep(timesteps) # [B, 1, D] # Create mask for the time information time_mask = torch.ones((batch_size, 1), dtype=bool, device=x.device) # Create the prefix features (text, time, etc): [B, max_text_len + 1 + etc] prefix_feats = torch.cat((emb_text, emb_time), axis=1) # Behavior from old code: not use text mask -> True for all the tokens if not self.use_text_mask: text_feat_pad_mask = torch.ones( (batch_size, emb_text.shape[1]), dtype=torch.bool, device=x.device, ) prefix_mask = torch.cat((text_feat_pad_mask, time_mask), axis=1) # add the input first heading angle if self.input_first_heading_angle: assert first_heading_angle is not None, "The first heading angle is mandatory for this model" # cos(angle) / sin(angle) first_heading_angle_feats = torch.stack( [ torch.cos(first_heading_angle), torch.sin(first_heading_angle), ], axis=-1, ) first_heading_angle_feats = self.linear_first_heading_angle(first_heading_angle_feats) first_heading_angle_feats = first_heading_angle_feats[:, None] # for cat first_heading_angle_mask = torch.ones( (batch_size, 1), dtype=bool, device=x.device, ) prefix_feats = torch.cat((prefix_feats, first_heading_angle_feats), axis=1) prefix_mask = torch.cat((prefix_mask, first_heading_angle_mask), axis=1) # compute the number of prefix features pose_start_ind = prefix_feats.shape[1] # Concatenate prefix and x: [B, len(prefix) + T, D] xseq = torch.cat((prefix_feats, x), axis=1) # Concatenate the masks and negate them: [B, len(prefix) + T] src_key_padding_mask = ~torch.cat((prefix_mask, x_pad_mask), axis=1) # Add positional encoding xseq = self.sequence_pos_encoder(xseq) # Input to the transformer and keep the motion indexes if isinstance(self.seqTransEncoder, nn.TransformerEncoder): assert not self.seqTransEncoder.use_nested_tensor, "Flash attention should be disabled due to bug!" output = self.seqTransEncoder( xseq, src_key_padding_mask=src_key_padding_mask, ) output = output[:, pose_start_ind:] # [B, T, D] output = self.output_linear(output) # [B, T, OD] return output class PositionalEncoding(nn.Module): """Non-learned positional encoding.""" def __init__( self, d_model: int, dropout: Optional[float] = 0.1, max_len: Optional[int] = 5000, ): """ Args: d_model (int): input dim dropout (Optional[float] = 0.1): dropout probability on output max_len (Optional[int] = 5000): maximum sequence length """ super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) # Note: have to replace torch.exp() and math.log() with torch.pow() # due to MKL exp() and ln() throws floating point exceptions on certain CPUs # see corresponding commit and MR div_term = torch.pow(10000.0, -torch.arange(0, d_model, 2).float() / d_model) # div_term = torch.exp( # torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model) # ) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) # [1, T, D] self.register_buffer("pe", pe, persistent=False) def forward(self, x: torch.Tensor) -> torch.Tensor: """Apply positional encoding to input sequence. Args: x (torch.Tensor): [B, T, D] input motion sequence Returns: torch.Tensor: [B, T, D] input motion with PE added to it (and optionally dropout) """ x = x + self.pe[:, : x.shape[1], :] return self.dropout(x) class TimestepEmbedder(nn.Module): """Encoder for diffusion step.""" def __init__(self, latent_dim: int, sequence_pos_encoder: PositionalEncoding): """ Args: latent_dim (int): dim to encode to sequence_pos_encoder (PositionalEncoding): the PE to use on timesteps """ super().__init__() self.latent_dim = latent_dim self.sequence_pos_encoder = sequence_pos_encoder time_embed_dim = self.latent_dim self.time_embed = nn.Sequential( nn.Linear(self.latent_dim, time_embed_dim), nn.SiLU(), nn.Linear(time_embed_dim, time_embed_dim), ) def forward(self, timesteps: torch.Tensor) -> torch.Tensor: """Embed timesteps by adding PE then going through linear layers. Args: timesteps (torch.Tensor): [B] Returns: torch.Tensor: [B, 1, D] """ return self.time_embed(self.sequence_pos_encoder.pe.transpose(0, 1)[timesteps]) ================================================ FILE: kimodo/model/cfg.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Classifier-free guidance wrapper for the denoiser at sampling time.""" from typing import Dict, Optional, Tuple, Union import torch import torch.nn as nn CFG_TYPES = ["nocfg", "regular", "separated"] class ClassifierFreeGuidedModel(nn.Module): """Wrapper around denoiser to use classifier-free guidance at sampling time.""" def __init__(self, model: nn.Module, cfg_type: Optional[str] = "separated"): """Wrap the denoiser for classifier-free guidance; cfg_type in CFG_TYPES (e.g. 'regular', 'nocfg').""" super().__init__() self.model = model assert cfg_type in CFG_TYPES, f"Invalid cfg_type: {cfg_type}" self.cfg_type_default = cfg_type def forward( self, cfg_weight: Union[float, Tuple[float, float]], x: torch.Tensor, x_pad_mask: torch.Tensor, text_feat: torch.Tensor, text_feat_pad_mask: torch.Tensor, timesteps: torch.Tensor, first_heading_angle: Optional[torch.Tensor] = None, motion_mask: Optional[torch.Tensor] = None, observed_motion: Optional[torch.Tensor] = None, cfg_type: Optional[str] = None, ) -> torch.Tensor: """ Args: cfg_weight (float): guidance weight float or tuple of floats with (text, constraint) weights if using separated cfg x (torch.Tensor): [B, T, dim_motion] current noisy motion x_pad_mask (torch.Tensor): [B, T] attention mask, positions with True are allowed to attend, False are not text_feat (torch.Tensor): [B, max_text_len, llm_dim] embedded text prompts text_feat_pad_mask (torch.Tensor): [B, max_text_len] attention mask, positions with True are allowed to attend, False are not timesteps (torch.Tensor): [B,] current denoising step motion_mask observed_motion neutral_joints (torch.Tensor): [B, nbjoints] The neutral joints of the motions Returns: torch.Tensor: same size as input x """ if cfg_type is None: cfg_type = self.cfg_type_default assert cfg_type in CFG_TYPES, f"Invalid cfg_type: {cfg_type}" # batched conditional and uncond pass together if cfg_type == "nocfg": return self.model( x, x_pad_mask, text_feat, text_feat_pad_mask, timesteps, first_heading_angle=first_heading_angle, motion_mask=motion_mask, observed_motion=observed_motion, ) elif cfg_type == "regular": assert isinstance(cfg_weight, (float, int)), "cfg_weight must be a single float for regular CFG" # out_uncond + w * (out_text_and_constraint - out_uncond) text_feat = torch.concatenate([text_feat, 0 * text_feat], dim=0) if motion_mask is not None: motion_mask = torch.concatenate([motion_mask, 0 * motion_mask], dim=0) if observed_motion is not None: observed_motion = torch.concatenate([observed_motion, observed_motion], dim=0) if first_heading_angle is not None: first_heading_angle = torch.concatenate([first_heading_angle, first_heading_angle], dim=0) out_cond_uncond = self.model( torch.concatenate([x, x], dim=0), torch.concatenate([x_pad_mask, x_pad_mask], dim=0), text_feat, torch.concatenate([text_feat_pad_mask, False * text_feat_pad_mask], dim=0), torch.concatenate([timesteps, timesteps], dim=0), first_heading_angle=first_heading_angle, motion_mask=motion_mask, observed_motion=observed_motion, ) out, out_uncond = torch.chunk(out_cond_uncond, 2) out_new = out_uncond + (cfg_weight * (out - out_uncond)) elif cfg_type == "separated": assert len(cfg_weight) == 2, "cfg_weight must be a tuple of two floats for separated CFG" # out_uncond + w_text * (out_text - out_uncond) + w_constraint * (out_constraint - out_uncond) text_feat = torch.concatenate([text_feat, 0 * text_feat, 0 * text_feat], dim=0) if motion_mask is not None: motion_mask = torch.concatenate([0 * motion_mask, motion_mask, 0 * motion_mask], dim=0) if observed_motion is not None: observed_motion = torch.concatenate([observed_motion, observed_motion, observed_motion], dim=0) if first_heading_angle is not None: first_heading_angle = torch.concatenate( [first_heading_angle, first_heading_angle, first_heading_angle], dim=0, ) out_cond_uncond = self.model( torch.concatenate([x, x, x], dim=0), torch.concatenate([x_pad_mask, x_pad_mask, x_pad_mask], dim=0), text_feat, torch.concatenate( [ text_feat_pad_mask, False * text_feat_pad_mask, False * text_feat_pad_mask, ], dim=0, ), torch.concatenate([timesteps, timesteps, timesteps], dim=0), first_heading_angle=first_heading_angle, motion_mask=motion_mask, observed_motion=observed_motion, ) out_text, out_constraint, out_uncond = torch.chunk(out_cond_uncond, 3) out_new = ( out_uncond + (cfg_weight[0] * (out_text - out_uncond)) + (cfg_weight[1] * (out_constraint - out_uncond)) ) else: raise ValueError(f"Invalid cfg_type: {cfg_type}") return out_new ================================================ FILE: kimodo/model/common.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Config hydration: env vars, _target_ resolution, and recursive instantiation.""" import importlib import os def get_env_var(name: str, default=None): """Read env var by name and by lowercased name; return default if neither set.""" return os.getenv(name, os.getenv(name.lower(), default)) def resolve_target(target: str): """Import module and return the attribute named by a dotted path (e.g. 'pkg.mod.Class').""" module_name, attr_name = target.rsplit(".", 1) module = importlib.import_module(module_name) return getattr(module, attr_name) def materialize_value(value): """Recursively turn dicts with '_target_' into instances; lists/dicts traversed; leaves unchanged.""" if isinstance(value, dict): if "_target_" in value: return instantiate_from_dict(value) return {k: materialize_value(v) for k, v in value.items()} if isinstance(value, list): return [materialize_value(v) for v in value] return value def instantiate_from_dict(node, overrides=None): """Build an instance from a config dict: '_target_' gives the class, other keys are kwargs; overrides merged in.""" if not isinstance(node, dict) or "_target_" not in node: raise ValueError("Config node must be a dict with a '_target_' key.") target = resolve_target(node["_target_"]) kwargs = {} for key, value in node.items(): if key == "_target_": continue kwargs[key] = materialize_value(value) if overrides: kwargs.update({k: v for k, v in overrides.items() if v is not None}) return target(**kwargs) ================================================ FILE: kimodo/model/diffusion.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Diffusion process and DDIM sampling for motion generation.""" import math from typing import Optional, Tuple import torch from torch import nn def get_beta_schedule( num_diffusion_timesteps: int, max_beta: Optional[float] = 0.999, ) -> torch.Tensor: """Get cosine beta schedule.""" def alpha_bar(t): return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 betas = [] for i in range(num_diffusion_timesteps): t1 = i / num_diffusion_timesteps t2 = (i + 1) / num_diffusion_timesteps betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) return torch.tensor(betas, dtype=torch.float) class Diffusion(torch.nn.Module): """Cosine-schedule diffusion process: betas, alphas, and DDIM step mapping.""" def __init__(self, num_base_steps: int): """Set up cosine beta schedule and precompute diffusion variables for num_base_steps.""" super().__init__() self.num_base_steps = num_base_steps betas_base = get_beta_schedule(self.num_base_steps) self.register_buffer("betas_base", betas_base, persistent=False) alphas_cumprod_base = torch.cumprod(1.0 - self.betas_base, dim=0) self.register_buffer("alphas_cumprod_base", alphas_cumprod_base, persistent=False) use_timesteps, _ = self.space_timesteps(self.num_base_steps) self.calc_diffusion_vars(use_timesteps) def extra_repr(self) -> str: return f"num_base_steps={self.num_base_steps}" @property def device(self): return self.betas_base.device def space_timesteps(self, num_denoising_steps: int) -> Tuple[torch.Tensor, torch.Tensor]: """Return (use_timesteps, map_tensor) for a subsampled denoising schedule of num_denoising_steps.""" nsteps_train = self.num_base_steps frac_stride = (nsteps_train - 1) / max(1, num_denoising_steps - 1) use_timesteps = torch.round(torch.arange(nsteps_train, device=self.device) * frac_stride).to(torch.long) use_timesteps = torch.clamp(use_timesteps, max=nsteps_train - 1) map_tensor = torch.arange(nsteps_train, device=self.device, dtype=torch.long)[use_timesteps] return use_timesteps, map_tensor def calc_diffusion_vars(self, use_timesteps: torch.Tensor) -> None: """Update buffers (betas, alphas, alphas_cumprod, etc.) for the given subsampled timesteps.""" alphas_cumprod = self.alphas_cumprod_base[use_timesteps] last_alpha_cumprod = torch.cat([torch.tensor([1.0]).to(alphas_cumprod), alphas_cumprod[:-1]]) betas = 1.0 - alphas_cumprod / last_alpha_cumprod self.register_buffer("betas", betas, persistent=False) alphas = 1.0 - self.betas self.register_buffer("alphas", alphas, persistent=False) alphas_cumprod = torch.cumprod(self.alphas, dim=0) alphas_cumprod = torch.clamp(alphas_cumprod, min=1e-9) self.register_buffer("alphas_cumprod", alphas_cumprod, persistent=False) alphas_cumprod_prev = torch.cat([torch.tensor([1.0]).to(self.alphas_cumprod), self.alphas_cumprod[:-1]]) self.register_buffer("alphas_cumprod_prev", alphas_cumprod_prev, persistent=False) sqrt_recip_alphas_cumprod = torch.rsqrt(self.alphas_cumprod) self.register_buffer("sqrt_recip_alphas_cumprod", sqrt_recip_alphas_cumprod, persistent=False) sqrt_recipm1_alphas_cumprod = torch.rsqrt(self.alphas_cumprod / (1.0 - self.alphas_cumprod)) self.register_buffer("sqrt_recipm1_alphas_cumprod", sqrt_recipm1_alphas_cumprod, persistent=False) posterior_variance = self.betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod) self.register_buffer("posterior_variance", posterior_variance, persistent=False) sqrt_alphas_cumprod = torch.rsqrt(1.0 / self.alphas_cumprod) self.register_buffer("sqrt_alphas_cumprod", sqrt_alphas_cumprod, persistent=False) sqrt_one_minus_alphas_cumprod = torch.rsqrt(1.0 / (1.0 - self.alphas_cumprod)) self.register_buffer( "sqrt_one_minus_alphas_cumprod", sqrt_one_minus_alphas_cumprod, persistent=False, ) def q_sample( self, x_start: torch.Tensor, t: torch.Tensor, noise: torch.Tensor = None, ): if noise is None: noise = torch.randn_like(x_start) assert noise.shape == x_start.shape xt = ( self.sqrt_alphas_cumprod[t, None, None] * x_start + self.sqrt_one_minus_alphas_cumprod[t, None, None] * noise ) return xt class DDIMSampler(nn.Module): """Deterministic DDIM sampler (eta = 0).""" def __init__(self, diffusion: Diffusion): super().__init__() self.diffusion = diffusion def __call__( self, use_timesteps: torch.Tensor, x_t: torch.Tensor, pred_xstart: torch.Tensor, t: torch.Tensor, ) -> torch.Tensor: self.diffusion.calc_diffusion_vars(use_timesteps) eps = ( self.diffusion.sqrt_recip_alphas_cumprod[t, None, None] * x_t - pred_xstart ) / self.diffusion.sqrt_recipm1_alphas_cumprod[t, None, None] alpha_bar_prev = self.diffusion.alphas_cumprod_prev[t, None, None] x = pred_xstart * torch.sqrt(alpha_bar_prev) + torch.sqrt(1 - alpha_bar_prev) * eps return x ================================================ FILE: kimodo/model/kimodo_model.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Kimodo model: denoiser, text encoder, diffusion sampling, and post-processing.""" import logging from typing import Dict, List, Optional, Tuple, Union import torch from torch import nn from tqdm.auto import tqdm from kimodo.constraints import EndEffectorConstraintSet, FullBodyConstraintSet from kimodo.motion_rep.feature_utils import compute_heading_angle, length_to_mask from kimodo.postprocess import post_process_motion from kimodo.sanitize import sanitize_texts from kimodo.skeleton import SOMASkeleton30 from kimodo.tools import to_numpy from .cfg import ClassifierFreeGuidedModel from .diffusion import DDIMSampler, Diffusion log = logging.getLogger(__name__) class Kimodo(nn.Module): """Helper class for test time.""" def __init__( self, denoiser: nn.Module, text_encoder: nn.Module, num_base_steps: int, device: Optional[Union[str, torch.device]] = None, cfg_type: Optional[str] = "separated", ): super().__init__() self.denoiser = denoiser.eval() if cfg_type is None: cfg_type = "nocfg" # Add Classifier-free guidance to the model if needed self.denoiser = ClassifierFreeGuidedModel(self.denoiser, cfg_type=cfg_type) self.motion_rep = denoiser.motion_rep self.skeleton = self.motion_rep.skeleton self.fps = denoiser.motion_rep.fps self.diffusion = Diffusion(num_base_steps=num_base_steps) self.sampler = DDIMSampler(self.diffusion) self.text_encoder = text_encoder self.device = device # for classifier-free guidance self.to(device) @property def output_skeleton(self): """Skeleton used for model output (somaskel77 for SOMA, else unchanged).""" if isinstance(self.skeleton, SOMASkeleton30): return self.skeleton.somaskel77 return self.skeleton def train(self, mode: bool): self.denoiser.train(mode) return self def eval(self): self.denoiser.eval() return self def denoising_step( self, motion: torch.Tensor, pad_mask: torch.Tensor, text_feat: torch.Tensor, text_pad_mask: torch.Tensor, t: torch.Tensor, first_heading_angle: Optional[torch.Tensor], motion_mask: torch.Tensor, observed_motion: torch.Tensor, num_denoising_steps: torch.Tensor, cfg_weight: Union[float, Tuple[float, float]], guide_masks: Optional[Dict] = None, cfg_type: Optional[str] = None, ) -> torch.Tensor: """Single denoising step. Returns: torch.Tensor: [B, T, D] noisy motion input to t-1 """ # subsample timesteps # NOTE: do this at every step due to ONNX export, i.e. num_samp_stepsmay change dynamically when # running onnx version so need to account for that. num_denoising_steps = num_denoising_steps[0] use_timesteps, map_tensor = self.diffusion.space_timesteps(num_denoising_steps) self.diffusion.calc_diffusion_vars(use_timesteps) # first compute initial clean prediction from denoiser t_map = map_tensor[t] with torch.inference_mode(): pred_clean = self.denoiser( cfg_weight, motion, pad_mask, text_feat, text_pad_mask, t_map, first_heading_angle, motion_mask, observed_motion, cfg_type=cfg_type, ) # sampler computes next step noisy motion x_tm1 = self.sampler(use_timesteps, motion, pred_clean, t) return x_tm1 def _multiprompt( self, prompts: list[str], num_frames: int | list[int], num_denoising_steps: int, constraint_lst: Optional[list] = [], cfg_weight: Optional[float] = [2.0, 2.0], num_samples: Optional[int] = None, cfg_type: Optional[str] = None, return_numpy: bool = False, first_heading_angle: Optional[torch.Tensor] = None, # for transitioning num_transition_frames: int = 5, # for postprocess post_processing: bool = False, root_margin: float = 0.04, # progress bar progress_bar=tqdm, ) -> torch.Tensor: device = self.device bs = num_samples texts = sanitize_texts(prompts) if isinstance(num_frames, int): # same duration for all the segments num_frames = [num_frames for _ in range(num_samples)] tosqueeze = False if num_samples is None: num_samples = 1 tosqueeze = True if constraint_lst is None: constraint_lst = [] # Generate one chunck at a time current_frame = 0 generated_motions = [] for idx, (text, num_frame) in enumerate(zip(texts, num_frames)): texts_bs = [text for _ in range(num_samples)] lengths = torch.tensor( [num_frame for _ in range(num_samples)], device=device, ) is_first_motion = not generated_motions observed_motion, motion_mask = None, None # filter the constraint_lst to only keep the relevent ones constraint_lst_base = [ constraint.crop_move(current_frame, current_frame + num_frame) for constraint in constraint_lst ] # this move temporally but not spatially observed_motion, motion_mask = self.motion_rep.create_conditions_from_constraints_batched( constraint_lst_base, lengths, to_normalize=False, # don't normalize yet, it needs to be moved around device=device, ) if not is_first_motion: nb_transition_frames = num_transition_frames if nb_transition_frames < 1: raise ValueError(f"num_transition_frames must be at least 1, got {nb_transition_frames}") latest_motions = generated_motions.pop() # remove the transition part of A (will be put back afterward) generated_motions.append(latest_motions[:, :-nb_transition_frames]) latest_frames = latest_motions[:, -nb_transition_frames:] last_output = self.motion_rep.inverse( latest_frames, is_normalized=False, return_numpy=False, ) smooth_root_2d = last_output["smooth_root_pos"][..., [0, 2]] # add constraints at the begining to allow natural transitions constraint_lst_transition = [] for batch_id in range(bs): new_constraint = FullBodyConstraintSet( self.skeleton, torch.arange(num_transition_frames), last_output["posed_joints"][batch_id, :num_transition_frames], last_output["global_rot_mats"][batch_id, :num_transition_frames], smooth_root_2d[batch_id, :num_transition_frames], ) # separate end-effector constraint to capture hand/feet rotations new_ee_constraint = EndEffectorConstraintSet( self.skeleton, torch.arange(num_transition_frames), last_output["posed_joints"][batch_id, :num_transition_frames], last_output["global_rot_mats"][batch_id, :num_transition_frames], smooth_root_2d[batch_id, :num_transition_frames], joint_names=["LeftHand", "RightHand", "LeftFoot", "RightFoot"], ) constraint_lst_transition.append([new_constraint, new_ee_constraint]) transition_lengths = torch.tensor( [nb_transition_frames for _ in range(num_samples)], device=device, ) observed_motion_transition, motion_mask_transition = ( self.motion_rep.create_conditions_from_constraints_batched( constraint_lst_transition, transition_lengths, to_normalize=False, # don't normalize yet device=device, ) ) # concatenate the obversed motion / motion mask observed_motion = torch.cat([observed_motion_transition, observed_motion], axis=1) motion_mask = torch.cat([motion_mask_transition, motion_mask], axis=1) # we need to move each observed motion in the batch to the new starting points last_smooth_root_2d = smooth_root_2d[:, 0] observed_motion = self.motion_rep.translate_2d( observed_motion, -last_smooth_root_2d ) # equivalent to: self.motion_rep.translate_2d_to_zero(observed_motion) # remove dummy values after moving observed_motion = observed_motion * motion_mask lengths = lengths + transition_lengths first_heading_angle = compute_heading_angle(last_output["posed_joints"], self.skeleton)[:, 0] else: if first_heading_angle is None: # Start at 0 angle, but this will change afterward first_heading_angle = torch.tensor([0.0] * bs, device=device) else: first_heading_angle = torch.as_tensor(first_heading_angle, device=device) if first_heading_angle.numel() == 1: first_heading_angle = first_heading_angle.repeat(bs) observed_motion = self.motion_rep.normalize(observed_motion) max_frames = max(lengths) motion_pad_mask = length_to_mask(lengths) motion = self._generate( texts_bs, max_frames, num_denoising_steps=num_denoising_steps, pad_mask=motion_pad_mask, first_heading_angle=first_heading_angle, motion_mask=motion_mask, observed_motion=observed_motion, cfg_weight=cfg_weight, cfg_type=cfg_type, ) motion = self.motion_rep.unnormalize(motion) if not is_first_motion: motion_with_transition = self.motion_rep.translate_2d( motion, last_smooth_root_2d, ) if post_processing: # Per-segment postprocessing: inverse, postprocess, re-encode. # The full transition+segment is postprocessed together so the # transition constraints keep the junction smooth. seg_output = self.motion_rep.inverse( motion_with_transition, is_normalized=False, return_numpy=False, ) seg_constraints = [list(cl) for cl in constraint_lst_transition] for bi in range(bs): seg_constraints[bi].extend( [c.crop_move(current_frame - nb_transition_frames, current_frame - nb_transition_frames + num_frame + nb_transition_frames) for c in constraint_lst] ) corrected = post_process_motion( seg_output["local_rot_mats"], seg_output["root_positions"], seg_output["foot_contacts"], self.skeleton, seg_constraints, root_margin=root_margin, ) seg_output.update(corrected) motion = self.motion_rep( seg_output["local_rot_mats"], seg_output["root_positions"], to_normalize=False, lengths=lengths, ) else: motion = motion_with_transition[:, num_transition_frames:] transition_frames = motion_with_transition[:, :num_transition_frames] # linearly combine the previously generated transitions with the newly generated ones alpha = torch.linspace(1, 0, num_transition_frames, device=device)[:, None] new_transition_frames = ( latest_frames[:, :num_transition_frames] * alpha + (1 - alpha) * transition_frames ) # add new transitions frames for A (merging with B prediction of the history) generated_motions.append(new_transition_frames) elif post_processing: # First segment: postprocess immediately seg_output = self.motion_rep.inverse( motion, is_normalized=False, return_numpy=False, ) seg_constraints = constraint_lst_base if constraint_lst_base else [] corrected = post_process_motion( seg_output["local_rot_mats"], seg_output["root_positions"], seg_output["foot_contacts"], self.skeleton, seg_constraints, root_margin=root_margin, ) seg_output.update(corrected) motion = self.motion_rep( seg_output["local_rot_mats"], seg_output["root_positions"], to_normalize=False, lengths=lengths, ) generated_motions.append(motion) current_frame += num_frame generated_motions = torch.cat(generated_motions, axis=1) # temporal axis (b, t, d) if tosqueeze: generated_motions = generated_motions[0] output = self.motion_rep.inverse( generated_motions, is_normalized=False, return_numpy=False, ) # Post-processing: already applied per-segment inside the loop above, # so no additional post-processing pass is needed here. # Convert SOMA output to somaskel77 for external API if isinstance(self.skeleton, SOMASkeleton30): output = self.skeleton.output_to_SOMASkeleton77(output) # Convert to numpy if requested if return_numpy: output = to_numpy(output) return output def __call__( self, prompts: str | list[str], num_frames: int | list[int], num_denoising_steps: int, multi_prompt: bool = False, constraint_lst: Optional[list] = [], cfg_weight: Optional[float] = [2.0, 2.0], num_samples: Optional[int] = None, cfg_type: Optional[str] = None, return_numpy: bool = False, first_heading_angle: Optional[torch.Tensor] = None, # for transitioning num_transition_frames: int = 5, # for postprocess post_processing: bool = False, root_margin: float = 0.04, # progress bar progress_bar=tqdm, ) -> dict: """Generate motion from text prompts and optional kinematic constraints. When a single prompt/num_frames pair is given, one motion is generated. Passing lists of prompts and/or num_frames produces a batch of independent motions. With ``multi_prompt=True``, the prompts are treated as sequential segments that are generated and stitched together with smooth transitions. Args: prompts: One or more text descriptions of the desired motion. A single string generates one sample; a list generates a batch (or sequential segments when ``multi_prompt=True``). num_frames: Duration of the generated motion in frames. Can be a single int applied to every prompt or a per-prompt list. num_denoising_steps: Number of DDIM denoising steps. More steps generally improve quality at the cost of speed. multi_prompt: If ``True``, treat ``prompts`` as an ordered sequence of segments and concatenate them with transitions. constraint_lst: Per-sample list of kinematic constraints (e.g. keyframe poses, end-effector targets, 2-D paths). Pass an empty list for unconstrained generation. cfg_weight: Classifier-free guidance scale(s). A two-element list ``[text_cfg, constraint_cfg]`` controls text and constraint guidance independently. num_samples: Number of samples to generate. cfg_type: Override the default CFG strategy set at init (e.g. ``"separated"``). return_numpy: If ``True``, convert all output tensors to numpy arrays. first_heading_angle: Initial body heading in radians. Shape ``(B,)`` or scalar. Defaults to ``0`` (facing +Z). num_transition_frames: Number of overlapping frames used to blend consecutive segments in multi-prompt mode. post_processing: If ``True``, apply post-processing (foot-skate cleanup and constraint enforcement). root_margin: Horizontal margin (in meters) used by the post-processor to determine when to correct root motion. When root deviates more than margin from the constraint, the post-processor will correct it. progress_bar: Callable wrapping an iterable to display progress (default: ``tqdm``). Pass a no-op to silence output. Returns: dict: A dictionary of motion tensors (or numpy arrays if ``return_numpy=True``) with the following keys: - ``local_rot_mats`` – Local joint rotations as rotation matrices. - ``global_rot_mats`` – Global joint rotations as rotation matrices. - ``posed_joints`` – Joint positions in world space. - ``root_positions`` – Root joint positions. - ``smooth_root_pos`` – Smoothed root trajectory. - ``foot_contacts`` – Boolean foot-contact labels [left heel, left toe, right heel, right toe]. - ``global_root_heading`` – Root heading angle over time. """ device = self.device if multi_prompt: # multi prompt generation return self._multiprompt( prompts, num_frames, num_denoising_steps, constraint_lst, cfg_weight, num_samples, cfg_type, return_numpy, first_heading_angle, num_transition_frames, post_processing, root_margin, progress_bar, ) # Input checking tosqueeze = False if isinstance(prompts, list) and isinstance(num_frames, list): assert len(prompts) == len(num_frames), "The number of prompts should match the number of num_frames." num_samples = len(prompts) elif isinstance(prompts, list): num_samples = len(prompts) num_frames = [num_frames for _ in range(num_samples)] elif isinstance(num_frames, list): num_samples = len(num_frames) prompts = [prompts for _ in range(num_samples)] else: if num_samples is None: tosqueeze = True num_samples = 1 prompts = [prompts for _ in range(num_samples)] num_frames = [num_frames for _ in range(num_samples)] bs = num_samples texts = sanitize_texts(prompts) lengths = torch.tensor( num_frames, device=device, ) max_frames = max(lengths) motion_pad_mask = length_to_mask(lengths) if first_heading_angle is None: # Start at 0 angle first_heading_angle = torch.tensor([0.0] * bs, device=device) else: first_heading_angle = torch.as_tensor(first_heading_angle, device=device) if first_heading_angle.numel() == 1: first_heading_angle = first_heading_angle.repeat(bs) observed_motion, motion_mask = None, None if constraint_lst: observed_motion, motion_mask = self.motion_rep.create_conditions_from_constraints_batched( constraint_lst, lengths, to_normalize=True, device=device, ) motion = self._generate( texts, max_frames, num_denoising_steps=num_denoising_steps, pad_mask=motion_pad_mask, first_heading_angle=first_heading_angle, motion_mask=motion_mask, observed_motion=observed_motion, cfg_weight=cfg_weight, cfg_type=cfg_type, progress_bar=progress_bar, ) if tosqueeze: motion = motion[0] output = self.motion_rep.inverse( motion, is_normalized=True, return_numpy=False, # Keep as tensor for potential post-processing ) # Apply post-processing if requested if post_processing: corrected = post_process_motion( output["local_rot_mats"], output["root_positions"], output["foot_contacts"], self.skeleton, constraint_lst, root_margin=root_margin, ) # key frame outputs / foot contacts are not changed output.update(corrected) # Convert SOMA output to somaskel77 for external API if isinstance(self.skeleton, SOMASkeleton30): output = self.skeleton.output_to_SOMASkeleton77(output) # Convert to numpy if requested if return_numpy: output = to_numpy(output) return output def _generate( self, texts: List[str], max_frames: int, num_denoising_steps: int, pad_mask: torch.Tensor, first_heading_angle: Optional[torch.Tensor], motion_mask: torch.Tensor, observed_motion: torch.Tensor, cfg_weight: Optional[float] = 2.0, text_feat: Optional[torch.Tensor] = None, text_pad_mask: Optional[torch.Tensor] = None, guide_masks: Optional[Dict] = None, cfg_type: Optional[str] = None, progress_bar=tqdm, ) -> torch.Tensor: """Sample full denoising loop. Args: texts (List[str]): batch of text prompts to use for sampling (if text_feat is not passed in) """ device = self.device if text_feat is None: assert text_pad_mask is None log.info("Encoding text...") text_feat, text_length = self.text_encoder(texts) text_feat = text_feat.to(device) # handle empty string (set to zero) empty_text_mask = [len(text.strip()) == 0 for text in texts] text_feat[empty_text_mask] = 0 # Create the pad mask for the text batch_size, maxlen = text_feat.shape[:2] tensor_text_length = torch.tensor(text_length, device=device) tensor_text_length[empty_text_mask] = 0 text_pad_mask = torch.arange(maxlen, device=device).expand(batch_size, maxlen) < tensor_text_length[:, None] if motion_mask is not None: if motion_mask.dtype == torch.bool: motion_mask = 1 * motion_mask batch_size = text_feat.shape[0] # sample loop indices = list(range(num_denoising_steps))[::-1] shape = (batch_size, max_frames, self.motion_rep.motion_rep_dim) cur_mot = torch.randn(shape, device=self.device) num_denoising_steps = torch.tensor( [num_denoising_steps], device=self.device ) # this and t need to be tensor for onnx export # init diffusion with correct num steps before looping use_timesteps = self.diffusion.space_timesteps(num_denoising_steps[0])[0] self.diffusion.calc_diffusion_vars(use_timesteps) for i in progress_bar(indices): t = torch.tensor([i] * cur_mot.size(0), device=self.device) with torch.inference_mode(): cur_mot = self.denoising_step( cur_mot, pad_mask, text_feat, text_pad_mask, t, first_heading_angle, motion_mask, observed_motion, num_denoising_steps, cfg_weight, guide_masks=guide_masks, cfg_type=cfg_type, ) return cur_mot ================================================ FILE: kimodo/model/llm2vec/README.md ================================================ This is a patched version of the original [LLM2Vec](https://github.com/McGill-NLP/llm2vec) codebase so that `McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised` works with `transformers==5.0.0rc3`. ================================================ FILE: kimodo/model/llm2vec/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """LLM2Vec text encoder and wrapper for Kimodo.""" from .llm2vec import LLM2Vec from .llm2vec_wrapper import LLM2VecEncoder __all__ = [ "LLM2Vec", "LLM2VecEncoder", ] ================================================ FILE: kimodo/model/llm2vec/llm2vec.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import logging import os from functools import partial from typing import Dict, List, Optional, Union import numpy as np import torch import torch.multiprocessing as mp from peft import PeftModel from torch import Tensor, device, nn from tqdm.autonotebook import tqdm, trange from transformers import ( AutoConfig, AutoModel, AutoTokenizer, GemmaConfig, LlamaConfig, MistralConfig, PretrainedConfig, Qwen2Config, ) logger = logging.getLogger(__name__) def batch_to_device(batch, target_device: device): """Send a pytorch batch to a device (CPU/GPU)""" for key in batch: if isinstance(batch[key], Tensor): batch[key] = batch[key].to(target_device) return batch class LLM2Vec(nn.Module): def __init__( self, model: AutoModel, tokenizer: AutoTokenizer, pooling_mode: str = "mean", max_length: int = 512, doc_max_length: int = 400, skip_instruction: bool = True, ): super().__init__() self.model = model self.tokenizer = tokenizer self.pooling_mode = pooling_mode self.skip_instruction = skip_instruction self.max_length = max_length self.doc_max_length = doc_max_length self.config = model.config @classmethod def _get_model_class(cls, config_class_name, enable_bidirectional): if not enable_bidirectional: return AutoModel if config_class_name == "MistralConfig": from .models.bidirectional_mistral import MistralBiModel return MistralBiModel elif config_class_name == "LlamaConfig": from .models.bidirectional_llama import LlamaBiModel return LlamaBiModel elif config_class_name == "GemmaConfig": from .models.bidirectional_gemma import GemmaBiModel return GemmaBiModel elif config_class_name == "Qwen2Config": from .models.bidirectional_qwen2 import Qwen2BiModel return Qwen2BiModel else: raise ValueError(f"{config_class_name} is not supported yet with bidirectional models.") @classmethod def from_pretrained( cls, base_model_name_or_path, peft_model_name_or_path=None, merge_peft=False, enable_bidirectional=True, **kwargs, ): # pop out encoder args keys = ["pooling_mode", "max_length", "doc_max_length", "skip_instruction"] encoder_args = {key: kwargs.pop(key, None) for key in keys if kwargs.get(key) is not None} tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" config = AutoConfig.from_pretrained(base_model_name_or_path) config_class_name = config.__class__.__name__ model_class = cls._get_model_class(config_class_name, enable_bidirectional=enable_bidirectional) model = model_class.from_pretrained(base_model_name_or_path, **kwargs) if os.path.isdir(base_model_name_or_path) and os.path.exists(f"{base_model_name_or_path}/config.json"): with open(f"{base_model_name_or_path}/config.json", "r") as fIn: config_dict = json.load(fIn) config = PretrainedConfig.from_dict(config_dict) model.config._name_or_path = config._name_or_path # For special case where config.json and adapter weights are in the same directory if hasattr(model, "peft_config"): model = PeftModel.from_pretrained( model, base_model_name_or_path, ) model = model.merge_and_unload() if peft_model_name_or_path is not None: model = PeftModel.from_pretrained( model, peft_model_name_or_path, ) if merge_peft: model = model.merge_and_unload() config = {} config_addr = peft_model_name_or_path if peft_model_name_or_path is not None else base_model_name_or_path if os.path.exists(f"{config_addr}/llm2vec_config.json"): with open(f"{config_addr}/llm2vec_config.json", "r") as fIn: llm2vec_config = json.load(fIn) config.update(llm2vec_config) for key, value in encoder_args.items(): config[key] = value return cls(model=model, tokenizer=tokenizer, **config) def prepare_for_tokenization(self, text): if self.model.config._name_or_path == "meta-llama/Meta-Llama-3-8B-Instruct": text = "<|start_header_id|>user<|end_header_id|>\n\n" + text.strip() + "<|eot_id|>" return text if self.model.config._name_or_path in [ "mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Llama-2-7b-chat-hf", ]: text = "[INST] " + text.strip() + " [/INST]" if self.model.config._name_or_path in [ "google/gemma-2-9b-it", ]: text = "user\n" + text.strip() + "" if self.model.config._name_or_path in [ "Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-7B-Instruct", ]: text = "<|im_start|>user\n" + text.strip() + "<|im_end|>" if self.pooling_mode == "eos_token": if self.model.config._name_or_path == "meta-llama/Meta-Llama-3-8B": text = text.strip() + "<|end_of_text|>" elif isinstance(self.model.config, LlamaConfig) or isinstance(self.model.config, MistralConfig): text = text.strip() + " " elif isinstance(self.model.config, GemmaConfig): text = text.strip() + "" elif isinstance(self.model.config, Qwen2Config): text = text.strip() + "<|endoftext|>" return text def tokenize(self, texts): texts_2 = [] original_texts = [] for text in texts: t = text.split("!@#$%^&*()") texts_2.append(t[1] if len(t) > 1 else "") original_texts.append("".join(t)) original = self.tokenizer( original_texts, return_tensors="pt", padding=True, truncation=True, max_length=self.max_length, ) embed_mask = None for t_i, t in enumerate(texts_2): ids = self.tokenizer( [t], return_tensors="pt", padding=True, truncation=True, max_length=self.max_length, add_special_tokens=False, ) if embed_mask is None: e_m = torch.zeros_like(original["attention_mask"][t_i]) if len(ids["input_ids"][0]) > 0: e_m[-len(ids["input_ids"][0]) :] = torch.ones(len(ids["input_ids"][0])) embed_mask = e_m.unsqueeze(0) else: e_m = torch.zeros_like(original["attention_mask"][t_i]) if len(ids["input_ids"][0]) > 0: e_m[-len(ids["input_ids"][0]) :] = torch.ones(len(ids["input_ids"][0])) embed_mask = torch.cat((embed_mask, e_m.unsqueeze(0)), dim=0) original["embed_mask"] = embed_mask return original def _skip_instruction(self, sentence_feature): assert sentence_feature["attention_mask"].shape == sentence_feature["embed_mask"].shape sentence_feature["attention_mask"] = sentence_feature["embed_mask"] def forward(self, sentence_feature: Dict[str, Tensor]): embed_mask = None if "embed_mask" in sentence_feature: embed_mask = sentence_feature.pop("embed_mask") reps = self.model(**sentence_feature) sentence_feature["embed_mask"] = embed_mask return self.get_pooling(sentence_feature, reps.last_hidden_state) def get_pooling(self, features, last_hidden_states): # All models padded from left assert self.tokenizer.padding_side == "left", "Pooling modes are implemented for padding from left." if self.skip_instruction: self._skip_instruction(features) seq_lengths = features["attention_mask"].sum(dim=-1) if self.pooling_mode == "mean": return torch.stack( [last_hidden_states[i, -length:, :].mean(dim=0) for i, length in enumerate(seq_lengths)], dim=0, ) elif self.pooling_mode == "weighted_mean": bs, l, _ = last_hidden_states.shape complete_weights = torch.zeros(bs, l, device=last_hidden_states.device) for i, seq_l in enumerate(seq_lengths): if seq_l > 0: complete_weights[i, -seq_l:] = torch.arange(seq_l) + 1 complete_weights[i] /= torch.clamp(complete_weights[i].sum(), min=1e-9) return torch.sum(last_hidden_states * complete_weights.unsqueeze(-1), dim=1) elif self.pooling_mode == "eos_token" or self.pooling_mode == "last_token": return last_hidden_states[:, -1] elif self.pooling_mode == "bos_token": return last_hidden_states[features["input_ids"] == self.tokenizer.bos_token_id] else: raise ValueError(f"{self.pooling_mode} is not implemented yet.") def _convert_to_str(self, instruction, text): tokenized_q = self.tokenizer( text, return_tensors="pt", padding=True, truncation=True, max_length=self.max_length, add_special_tokens=False, ) tokenized_q_length = len(tokenized_q["input_ids"][0]) while tokenized_q_length > self.doc_max_length: reduction_ratio = self.doc_max_length / tokenized_q_length reduced_length = int(len(text.split()) * reduction_ratio) text = " ".join(text.split()[:reduced_length]) tokenized_q = self.tokenizer( text, return_tensors="pt", padding=True, truncation=True, max_length=self.max_length, add_special_tokens=False, ) tokenized_q_length = len(tokenized_q["input_ids"][0]) return f"{instruction.strip()} !@#$%^&*(){text}" if instruction else f"!@#$%^&*(){text}" def encode( self, sentences: Union[str, List[str]], batch_size: int = 32, show_progress_bar: bool = True, convert_to_numpy: bool = False, convert_to_tensor: bool = False, device: Optional[str] = None, ): """ Encode a list of sentences to their respective embeddings. The sentences can be a list of strings or a string. Args: sentences: sentence or sentences to encode. batch_size: batch size for turning sentence tokens into embeddings. show_progress_bar: whether to show progress bars during encoding steps. convert_to_numpy: If true, return numpy arrays instead of torch tensors. convert_to_tensor: If true, return torch tensors (default). device: torch backend device identifier (e.g., 'cuda', 'cpu','mps' etc.). If not specified, the default is to use cuda when available, otherwise cpu. Note that only the choice of 'cuda' supports multiprocessing as currently implemented. Returns: embeddings of the sentences. Embeddings are detached and always on the CPU (see _encode implementation). """ if isinstance(sentences[0], str) and isinstance(sentences[-1], int): sentences = [sentences] # required for MEDI version of MTEB if isinstance(sentences[0], str): sentences = [[""] + [sentence] for sentence in sentences] if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" concatenated_input_texts = [] for sentence in sentences: assert isinstance(sentence[0], str) assert isinstance(sentence[1], str) concatenated_input_texts.append(self._convert_to_str(sentence[0], sentence[1])) sentences = concatenated_input_texts self.eval() if convert_to_tensor: convert_to_numpy = False length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences]) sentences_sorted = [sentences[idx] for idx in length_sorted_idx] all_embeddings = [] if torch.cuda.device_count() <= 1: # This branch also support mps devices self.to(device) for start_index in trange( 0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar, ): sentences_batch = sentences_sorted[start_index : start_index + batch_size] embeddings = self._encode(sentences_batch, device=device, convert_to_numpy=convert_to_numpy) all_embeddings.append(embeddings) else: num_proc = torch.cuda.device_count() cuda_compatible_multiprocess = mp.get_context("spawn") with cuda_compatible_multiprocess.Pool(num_proc) as p: sentences_batches = [ sentences_sorted[start_index : start_index + batch_size] for start_index in range(0, len(sentences), batch_size) ] progress_bar = tqdm( total=len(sentences_batches), desc="Batches", disable=not show_progress_bar, ) results = [] def update(*args): progress_bar.update() for batch in sentences_batches: results.append( p.apply_async( self._encode, args=(batch, None, convert_to_numpy, True), callback=update, ) ) all_embeddings = [result.get() for result in results] progress_bar.close() all_embeddings = torch.cat(all_embeddings, dim=0) all_embeddings = all_embeddings[np.argsort(length_sorted_idx)] all_embeddings = all_embeddings.to(torch.float32) if convert_to_numpy: all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings]) return all_embeddings def save(self, output_path, merge_before_save=False, save_config=True): if merge_before_save and isinstance(self.model, PeftModel): self.model = self.model.merge_and_unload() # Fixes the issue of saving - https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse/discussions/1 if hasattr(self.model, "_hf_peft_config_loaded"): self.model._hf_peft_config_loaded = False self.model.save_pretrained(output_path) self.tokenizer.save_pretrained(output_path) llm2vec_config = { "pooling_mode": self.pooling_mode, "max_length": self.max_length, "doc_max_length": self.doc_max_length, "skip_instruction": self.skip_instruction, } if save_config: os.makedirs(output_path, exist_ok=True) with open(f"{output_path}/llm2vec_config.json", "w") as fOut: json.dump(llm2vec_config, fOut, indent=4) def _encode( self, sentences_batch, device: Optional[str] = None, convert_to_numpy: bool = False, multiprocessing=False, ): if multiprocessing: # multiprocessing only supports CUDA devices at this time, so we ignore the value of device # and use cuda:rank for the device rank = mp.current_process()._identity[0] if device is None and torch.cuda.is_available(): device = f"cuda:{rank % torch.cuda.device_count()}" self.to(device) features = self.tokenize([self.prepare_for_tokenization(sentence) for sentence in sentences_batch]) features = batch_to_device(features, device) with torch.no_grad(): embeddings = self.forward(features) embeddings = embeddings.detach() embeddings = embeddings.cpu() return embeddings def _text_length(self, text: Union[List[int], List[List[int]]]): """Help function to get the length for the input text. Text can be either a string (which means a single text) a list of ints (which means a single tokenized text), or a tuple of list of ints (representing several text inputs to the model). """ if ( isinstance(text, str) or (isinstance(text, list) and isinstance(text[0], int)) or len(text) == 0 ): # Single text, list of ints, or empty return len(text) if isinstance(text, dict): # {key: value} case return len(next(iter(text.values()))) elif not hasattr(text, "__len__"): # Object has no len() method return 1 else: return sum([len(t) for t in text]) def resize_token_embeddings( self, new_num_tokens: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, ) -> nn.Embedding: return self.model.resize_token_embeddings(new_num_tokens=new_num_tokens, pad_to_multiple_of=pad_to_multiple_of) def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None): self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs) ================================================ FILE: kimodo/model/llm2vec/llm2vec_wrapper.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """LLM2Vec encoder wrapper for Kimodo text conditioning.""" import os import numpy as np import torch from .llm2vec import LLM2Vec class LLM2VecEncoder: """LLM2Vec text embeddings.""" def __init__( self, base_model_name_or_path: str, peft_model_name_or_path: str, dtype: str, llm_dim: int, device: str = "auto", ) -> None: torch_dtype = getattr(torch, dtype) self.llm_dim = llm_dim cache_dir = os.environ.get("HUGGINGFACE_CACHE_DIR") if "TEXT_ENCODERS_DIR" in os.environ: base_model_name_or_path = os.path.join(os.environ["TEXT_ENCODERS_DIR"], base_model_name_or_path) peft_model_name_or_path = os.path.join(os.environ["TEXT_ENCODERS_DIR"], peft_model_name_or_path) self.model = LLM2Vec.from_pretrained( base_model_name_or_path=base_model_name_or_path, peft_model_name_or_path=peft_model_name_or_path, torch_dtype=torch_dtype, cache_dir=cache_dir, ) env_device = os.environ.get("TEXT_ENCODER_DEVICE") if env_device: device = env_device if device == "auto": device = "cuda" if torch.cuda.is_available() else "cpu" self._device = device if device is not None: self.model = self.model.to(device) self.model.eval() for p in self.model.parameters(): p.requires_grad = False def to(self, device: torch.device): self.model = self.model.to(device) self._device = str(device) if not isinstance(device, str) else device return self def eval(self): self.model.eval() return self def get_device(self): return self.model.model.device def __call__(self, text: list[str] | str): is_string = False if isinstance(text, str): text = [text] is_string = True with torch.no_grad(): encoded_text = self.model.encode( text, # IMPORTANT: different batch sizes unexpectedly change the output embeddings, so we always set it to 1 # here for repeatability no matter how many texts are being encoded. This # is a fundamental issue with transformers, and is especially bad at lower # precisions (https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535) # note: this is an internal batch size used by llm2vec - the text list can still be of arbitrary length. batch_size=1, show_progress_bar=False, device=self._device, ) assert len(encoded_text.shape) assert self.llm_dim == encoded_text.shape[-1] encoded_text = encoded_text[:, None] lengths = np.ones(len(encoded_text), dtype=int).tolist() if is_string: encoded_text = encoded_text[0] lengths = lengths[0] encoded_text = torch.tensor(encoded_text).to(self._device) return encoded_text, lengths ================================================ FILE: kimodo/model/llm2vec/models/__init__.py ================================================ # from .bidirectional_gemma import GemmaBiForMNTP, GemmaBiModel # from .bidirectional_llama import LlamaBiForMNTP, LlamaBiModel # from .bidirectional_mistral import MistralBiForMNTP, MistralBiModel # from .bidirectional_qwen2 import Qwen2BiForMNTP, Qwen2BiModel ================================================ FILE: kimodo/model/llm2vec/models/attn_mask_utils.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. from typing import List, Optional, Tuple, Union import torch from transformers.modeling_attn_mask_utils import AttentionMaskConverter def _prepare_4d_causal_attention_mask( attention_mask: Optional[torch.Tensor], input_shape: Union[torch.Size, Tuple, List], inputs_embeds: torch.Tensor, past_key_values_length: int, sliding_window: Optional[int] = None, ): """Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape `(batch_size, key_value_length)` Args: attention_mask (`torch.Tensor` or `None`): A 2D attention mask of shape `(batch_size, key_value_length)` input_shape (`tuple(int)` or `list(int)` or `torch.Size`): The input shape should be a tuple that defines `(batch_size, query_length)`. inputs_embeds (`torch.Tensor`): The embedded inputs as a torch Tensor. past_key_values_length (`int`): The length of the key value cache. sliding_window (`int`, *optional*): If the model uses windowed attention, a sliding window should be passed. """ attn_mask_converter = AttentionMaskConverter( is_causal=False, sliding_window=sliding_window ) # is_causal=True in original implementation key_value_length = input_shape[-1] + past_key_values_length # 4d mask is passed through the layers if attention_mask is not None and len(attention_mask.shape) == 2: attention_mask = attn_mask_converter.to_4d( attention_mask, input_shape[-1], key_value_length=key_value_length, dtype=inputs_embeds.dtype, ) elif attention_mask is not None and len(attention_mask.shape) == 4: expected_shape = (input_shape[0], 1, input_shape[1], key_value_length) if tuple(attention_mask.shape) != expected_shape: raise ValueError( f"Incorrect 4D attention_mask shape: {tuple(attention_mask.shape)}; expected: {expected_shape}." ) else: # if the 4D mask has correct shape - invert it and fill with negative infinity inverted_mask = 1.0 - attention_mask attention_mask = inverted_mask.masked_fill( inverted_mask.to(torch.bool), torch.finfo(inputs_embeds.dtype).min ) else: attention_mask = attn_mask_converter.to_causal_4d( input_shape[0], input_shape[-1], key_value_length, dtype=inputs_embeds.dtype, device=inputs_embeds.device, ) return attention_mask # Adapted from _prepare_4d_causal_attention_mask def _prepare_4d_causal_attention_mask_for_sdpa( attention_mask: Optional[torch.Tensor], input_shape: Union[torch.Size, Tuple, List], inputs_embeds: torch.Tensor, past_key_values_length: int, sliding_window: Optional[int] = None, ): """Prepares the correct `attn_mask` argument to be used by `torch.nn.functional.scaled_dot_product_attention`. In case no token is masked in the `attention_mask` argument, we simply set it to `None` for the cases `query_length == 1` and `key_value_length == query_length`, and rely instead on SDPA `is_causal` argument to use causal/non-causal masks, allowing to dispatch to the flash attention kernel (that can otherwise not be used if a custom `attn_mask` is passed). """ attn_mask_converter = AttentionMaskConverter( is_causal=False, sliding_window=sliding_window ) # is_causal=True in original implementation key_value_length = input_shape[-1] + past_key_values_length batch_size, query_length = input_shape # torch.jit.trace, symbolic_trace and torchdynamo with fullgraph=True are unable to capture the controlflow `is_causal=attention_mask is None and q_len > 1` # used as an SDPA argument. We keep compatibility with these tracing tools by always using SDPA's `attn_mask` argument in case we are tracing. # TODO: For dynamo, rather use a check on fullgraph=True once this is possible (https://github.com/pytorch/pytorch/pull/120400). is_tracing = ( torch.jit.is_tracing() or isinstance(inputs_embeds, torch.fx.Proxy) or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling()) ) if attention_mask is not None: # 4d mask is passed through if len(attention_mask.shape) == 4: expected_shape = (input_shape[0], 1, input_shape[1], key_value_length) if tuple(attention_mask.shape) != expected_shape: raise ValueError( f"Incorrect 4D attention_mask shape: {tuple(attention_mask.shape)}; expected: {expected_shape}." ) else: # if the 4D mask has correct shape - invert it and fill with negative infinity inverted_mask = 1.0 - attention_mask.to(inputs_embeds.dtype) attention_mask = inverted_mask.masked_fill( inverted_mask.to(torch.bool), torch.finfo(inputs_embeds.dtype).min ) return attention_mask elif not is_tracing and torch.all(attention_mask == 1): if query_length == 1: # For query_length == 1, causal attention and bi-directional attention are the same. attention_mask = None elif key_value_length == query_length: attention_mask = None else: # Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation # may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here. # Reference: https://github.com/pytorch/pytorch/issues/108108 pass elif query_length > 1 and key_value_length != query_length: # See the comment above (https://github.com/pytorch/pytorch/issues/108108). # Ugly: we set it to True here to dispatch in the following controlflow to `to_causal_4d`. attention_mask = True elif is_tracing: raise ValueError( 'Attention using SDPA can not be traced with torch.jit.trace when no attention_mask is provided. To solve this issue, please either load your model with the argument `attn_implementation="eager"` or pass an attention_mask input when tracing the model.' ) if attention_mask is None: expanded_4d_mask = None elif attention_mask is True: expanded_4d_mask = attn_mask_converter.to_causal_4d( input_shape[0], input_shape[-1], key_value_length, dtype=inputs_embeds.dtype, device=inputs_embeds.device, ) else: expanded_4d_mask = attn_mask_converter.to_4d( attention_mask, input_shape[-1], dtype=inputs_embeds.dtype, key_value_length=key_value_length, ) # Attend to all tokens in masked rows from the causal_mask, for example the relevant first rows when # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path. # Details: https://github.com/pytorch/pytorch/issues/110213 if not is_tracing and expanded_4d_mask.device.type == "cuda": expanded_4d_mask = AttentionMaskConverter._unmask_unattended( expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min ) return expanded_4d_mask ================================================ FILE: kimodo/model/llm2vec/models/bidirectional_llama.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch from peft import PeftModel from torch import nn from transformers import LlamaConfig, LlamaForCausalLM, LlamaModel, LlamaPreTrainedModel from transformers.cache_utils import Cache, StaticCache from transformers.modeling_attn_mask_utils import AttentionMaskConverter from transformers.models.llama.modeling_llama import ( LlamaAttention, LlamaDecoderLayer, # LlamaFlashAttention2, LlamaMLP, LlamaRMSNorm, LlamaRotaryEmbedding, # LlamaSdpaAttention, ) from transformers.utils import logging from .utils import is_transformers_attn_greater_or_equal_4_43_1 logger = logging.get_logger(__name__) class ModifiedLlamaAttention(LlamaAttention): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.is_causal = False # class ModifiedLlamaFlashAttention2(LlamaFlashAttention2): # def __init__(self, *args, **kwargs): # super().__init__(*args, **kwargs) # self.is_causal = False # class ModifiedLlamaSdpaAttention(LlamaSdpaAttention): # def __init__(self, *args, **kwargs): # super().__init__(*args, **kwargs) # self.is_causal = False # LLAMA_ATTENTION_CLASSES = { # "eager": ModifiedLlamaAttention, # "flash_attention_2": ModifiedLlamaFlashAttention2, # "sdpa": ModifiedLlamaSdpaAttention, # } class ModifiedLlamaDecoderLayer(LlamaDecoderLayer): def __init__(self, config: LlamaConfig, layer_idx: int): nn.Module.__init__(self) self.hidden_size = config.hidden_size self.self_attn = ModifiedLlamaAttention(config=config, layer_idx=layer_idx) # self.self_attn = LLAMA_ATTENTION_CLASSES[config._attn_implementation]( # config=config, layer_idx=layer_idx # ) self.mlp = LlamaMLP(config) self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) class LlamaBiModel(LlamaModel): _no_split_modules = ["ModifiedLlamaDecoderLayer"] def __init__(self, config: LlamaConfig): if not is_transformers_attn_greater_or_equal_4_43_1(): raise ValueError( "The current implementation of LlamaEncoderModel follows modeling_llama.py of transformers version >= 4.43.1" ) LlamaPreTrainedModel.__init__(self, config) self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx) self.layers = nn.ModuleList( [ModifiedLlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] ) self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.rotary_emb = LlamaRotaryEmbedding(config=config) self.gradient_checkpointing = False # Initialize weights and apply final processing self.post_init() def _update_causal_mask( self, attention_mask, input_tensor, cache_position, past_key_values: Cache, output_attentions: bool, ): if self.config._attn_implementation == "flash_attention_2": if attention_mask is not None and 0.0 in attention_mask: return attention_mask return None # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail # to infer the attention mask. past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 using_static_cache = isinstance(past_key_values, StaticCache) # When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward # if self.config._attn_implementation == "sdpa" and not using_static_cache and not output_attentions: # if AttentionMaskConverter._ignore_causal_mask_sdpa( # attention_mask, # inputs_embeds=input_tensor, # past_key_values_length=past_seen_tokens, # is_training=self.training, # ): # return None dtype, device = input_tensor.dtype, input_tensor.device min_dtype = torch.finfo(dtype).min sequence_length = input_tensor.shape[1] if using_static_cache: target_length = past_key_values.get_max_length() else: target_length = ( attention_mask.shape[-1] if isinstance(attention_mask, torch.Tensor) else past_seen_tokens + sequence_length + 1 ) causal_mask = torch.zeros( (sequence_length, target_length), dtype=dtype, device=device ) # in original implementation - torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device) # Commenting out next 2 lines to disable causal masking # if sequence_length != 1: # causal_mask = torch.triu(causal_mask, diagonal=1) causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1) causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1) if attention_mask is not None: causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit if attention_mask.dim() == 2: mask_length = attention_mask.shape[-1] padding_mask = causal_mask[..., :mask_length].eq(0.0) * attention_mask[:, None, None, :].eq(0.0) causal_mask[..., :mask_length] = causal_mask[..., :mask_length].masked_fill(padding_mask, min_dtype) elif attention_mask.dim() == 4: # backwards compatibility: we allow passing a 4D attention mask shorter than the input length with # cache. In that case, the 4D attention mask attends to the newest tokens only. if attention_mask.shape[-2] < cache_position[0] + sequence_length: offset = cache_position[0] else: offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype causal_mask[ : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3], ] = mask_slice if ( self.config._attn_implementation == "sdpa" and attention_mask is not None and attention_mask.device.type == "cuda" and not output_attentions ): causal_mask = AttentionMaskConverter._unmask_unattended(causal_mask, min_dtype) return causal_mask class LlamaBiForMNTP(LlamaForCausalLM): def __init__(self, config): LlamaPreTrainedModel.__init__(self, config) self.model = LlamaBiModel(config) self.vocab_size = config.vocab_size self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) # Initialize weights and apply final processing self.post_init() # getter for PEFT model def get_model_for_peft(self): return self.model # setter for PEFT model def set_model_for_peft(self, model: PeftModel): self.model = model # save the PEFT model def save_peft_model(self, path): self.model.save_pretrained(path) ================================================ FILE: kimodo/model/llm2vec/models/utils.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. import importlib.metadata from packaging import version from transformers.utils.import_utils import _is_package_available def is_transformers_attn_greater_or_equal_4_43_1(): if not _is_package_available("transformers"): return False return version.parse(importlib.metadata.version("transformers")) >= version.parse("4.43.1") ================================================ FILE: kimodo/model/load_model.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Load Kimodo diffusion models from local checkpoints or Hugging Face.""" from pathlib import Path from typing import Optional from huggingface_hub import snapshot_download from omegaconf import OmegaConf from .loading import ( AVAILABLE_MODELS, DEFAULT_MODEL, DEFAULT_TEXT_ENCODER_URL, MODEL_NAMES, TMR_MODELS, get_env_var, instantiate_from_dict, ) from .registry import get_model_info, resolve_model_name DEFAULT_TEXT_ENCODER = "llm2vec" TEXT_ENCODER_PRESETS = { "llm2vec": { "target": "kimodo.model.LLM2VecEncoder", "kwargs": { "base_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp", "peft_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised", "dtype": "bfloat16", "llm_dim": 4096, "device": "auto", }, } } def _resolve_hf_model_path(modelname: str) -> Path: """Resolve model name to a local path, using Hugging Face cache or CHECKPOINT_DIR.""" try: repo_id = MODEL_NAMES[modelname] except KeyError: raise ValueError(f"Model '{modelname}' not found. Available models: {MODEL_NAMES.keys()}") local_cache = get_env_var("LOCAL_CACHE", "False").lower() == "true" if not local_cache: snapshot_dir = snapshot_download(repo_id=repo_id) # will check online no matter what return Path(snapshot_dir) try: snapshot_dir = snapshot_download(repo_id=repo_id, local_files_only=True) # will check local cache only return Path(snapshot_dir) except Exception: # if local cache is not found, download from online try: snapshot_dir = snapshot_download(repo_id=repo_id) return Path(snapshot_dir) except Exception: raise RuntimeError(f"Could not resolve model '{modelname}' from Hugging Face (repo: {repo_id}). ") from None def _build_api_text_encoder_conf(text_encoder_url: str) -> dict: return { "_target_": "kimodo.model.text_encoder_api.TextEncoderAPI", "url": text_encoder_url, } def _build_local_text_encoder_conf(text_encoder_fp32: bool = False) -> dict: text_encoder_name = get_env_var("TEXT_ENCODER", DEFAULT_TEXT_ENCODER) if text_encoder_name not in TEXT_ENCODER_PRESETS: available = ", ".join(sorted(TEXT_ENCODER_PRESETS)) raise ValueError(f"Unknown TEXT_ENCODER='{text_encoder_name}'. Available: {available}") preset = TEXT_ENCODER_PRESETS[text_encoder_name] if text_encoder_fp32: preset["kwargs"]["dtype"] = "float32" return { "_target_": preset["target"], **preset["kwargs"], } def _select_text_encoder_conf(text_encoder_url: str, text_encoder_fp32: bool = False) -> dict: # TEXT_ENCODER_MODE options: # - "api": force TextEncoderAPI # - "local": force local LLM2VecEncoder # - "auto": try API first, fallback to local if unreachable mode = get_env_var("TEXT_ENCODER_MODE", "auto").lower() if mode == "local": return _build_local_text_encoder_conf(text_encoder_fp32) if mode == "api": return _build_api_text_encoder_conf(text_encoder_url) api_conf = _build_api_text_encoder_conf(text_encoder_url) try: text_encoder = instantiate_from_dict(api_conf) # Probe availability early so inference doesn't fail later. text_encoder(["healthcheck"]) return api_conf except Exception as error: print( "Text encoder service is unreachable, falling back to local LLM2Vec " f"encoder. ({type(error).__name__}: {error})" ) return _build_local_text_encoder_conf(text_encoder_fp32) def load_model( modelname=None, device=None, eval_mode: bool = True, default_family: Optional[str] = "Kimodo", return_resolved_name: bool = False, text_encoder=None, text_encoder_fp32: bool = False, ): """Load a kimodo model by name (e.g. 'g1', 'soma'). Resolution of partial/full names (e.g. Kimodo-SOMA-RP-v1, SOMA) is done inside this function using default_family when the name is not a known short key. Args: modelname: Model identifier; uses DEFAULT_MODEL if None. Can be a short key, a full name (e.g. Kimodo-SOMA-RP-v1), or a partial name; unknown names are resolved via resolve_model_name using default_family. device: Target device for the model (e.g. 'cuda', 'cpu'). eval_mode: If True, set model to eval mode. default_family: Used when modelname is not in AVAILABLE_MODELS to resolve partial names ("Kimodo" for demo/generation, "TMR" for embed script). Default "Kimodo". return_resolved_name: If True, return (model, resolved_short_key). If False, return only the model. text_encoder: Pre-built text encoder to reuse. When provided, skips text encoder selection/instantiation entirely. text_encoder_fp32: If True, uses fp32 for the text encoder rather than default bfloat16. Returns: Loaded model in eval mode, or (model, resolved short key) if return_resolved_name is True. Raises: ValueError: If modelname is not in AVAILABLE_MODELS and cannot be resolved. FileNotFoundError: If config.yaml is missing in the checkpoint folder. """ if modelname is None: modelname = DEFAULT_MODEL if modelname not in AVAILABLE_MODELS: if default_family is not None: modelname = resolve_model_name(modelname, default_family) else: raise ValueError( f"""The model is not recognized. Please choose between: {AVAILABLE_MODELS}""" ) resolved_modelname = modelname # In case, we specify a custom checkpoint directory configured_checkpoint_dir = get_env_var("CHECKPOINT_DIR") if configured_checkpoint_dir: print(f"CHECKPOINT_DIR is set to {configured_checkpoint_dir}, checking the local cache...") # Checkpoint folders are named by display name (e.g. Kimodo-SOMA-RP-v1) info = get_model_info(modelname) checkpoint_folder_name = info.display_name if info is not None else modelname model_path = Path(configured_checkpoint_dir) / checkpoint_folder_name if not model_path.exists() and modelname != checkpoint_folder_name: # Fallback: try short_key for backward compatibility model_path = Path(configured_checkpoint_dir) / modelname if not model_path.exists(): print(f"Model folder not found at '{model_path}', downloading it from Hugging Face...") model_path = _resolve_hf_model_path(modelname) else: # Otherwise, we load the model from the local cache or download it from Hugging Face. model_path = _resolve_hf_model_path(modelname) model_config_path = model_path / "config.yaml" if not model_config_path.exists(): raise FileNotFoundError(f"The model checkpoint folder exists but config.yaml is missing: {model_config_path}") model_conf = OmegaConf.load(model_config_path) if modelname in TMR_MODELS: # Same process at the moment for TMR and Kimodo pass if text_encoder is not None: runtime_conf = OmegaConf.create({"checkpoint_dir": str(model_path)}) else: text_encoder_url = get_env_var("TEXT_ENCODER_URL", DEFAULT_TEXT_ENCODER_URL) runtime_conf = OmegaConf.create( { "checkpoint_dir": str(model_path), "text_encoder": _select_text_encoder_conf(text_encoder_url, text_encoder_fp32), } ) model_cfg = OmegaConf.to_container(OmegaConf.merge(model_conf, runtime_conf), resolve=True) model_cfg.pop("checkpoint_dir", None) if text_encoder is not None: # Prevent Hydra from instantiating a new text encoder; pass None so # Kimodo.__init__ receives a placeholder we replace immediately after. model_cfg["text_encoder"] = None model = instantiate_from_dict(model_cfg, overrides={"device": device}) if text_encoder is not None: model.text_encoder = text_encoder if eval_mode: model = model.eval() if return_resolved_name: return model, resolved_modelname return model ================================================ FILE: kimodo/model/loading.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Model loading utilities: checkpoints, registry, env, and Hydra-based instantiation.""" import os from pathlib import Path from typing import Any, Dict, Optional, Union import torch from hydra.utils import instantiate from omegaconf import OmegaConf from safetensors.torch import load_file as load_safetensors from .registry import ( AVAILABLE_MODELS, DEFAULT_MODEL, DEFAULT_TEXT_ENCODER_URL, KIMODO_MODELS, MODEL_NAMES, TMR_MODELS, ) def get_env_var(name: str, default: Optional[str] = None) -> Optional[str]: """Return environment variable value, or default if unset/empty.""" return os.environ.get(name) or default def instantiate_from_dict( cfg: Dict[str, Any], overrides: Optional[Dict[str, Any]] = None, ): """Instantiate an object from a config dict (e.g. from OmegaConf.to_container). The dict must contain _target_ with a fully qualified class path. Nested configs are instantiated recursively. """ if overrides: cfg = {**cfg, **overrides} conf = OmegaConf.create(cfg) return instantiate(conf) def load_checkpoint_state_dict(ckpt_path: Union[str, Path]) -> dict: """Load a state dict from a checkpoint file. If the checkpoint is a dict with a 'state_dict' key (e.g. PyTorch Lightning), that is returned; otherwise the whole checkpoint is treated as the state dict. Args: ckpt_path: Path to the checkpoint file. Returns: state_dict suitable for model.load_state_dict(). """ ckpt_path = str(ckpt_path) if ckpt_path.endswith(".safetensors"): state_dict = load_safetensors(ckpt_path) else: checkpoint = torch.load(ckpt_path, map_location="cpu", weights_only=False) if isinstance(checkpoint, dict) and "state_dict" in checkpoint: state_dict = checkpoint["state_dict"] elif isinstance(checkpoint, dict): state_dict = checkpoint else: raise ValueError(f"Unsupported checkpoint format: {ckpt_path}") return {key: val.detach().cpu() for key, val in state_dict.items()} __all__ = [ "get_env_var", "instantiate_from_dict", "KIMODO_MODELS", "TMR_MODELS", "AVAILABLE_MODELS", "MODEL_NAMES", "DEFAULT_MODEL", "DEFAULT_TEXT_ENCODER_URL", "load_checkpoint_state_dict", ] ================================================ FILE: kimodo/model/registry.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Registry of model names and Hugging Face repo IDs for Kimodo and TMR. Canonical source of truth is the list of repo IDs. Short keys (e.g. soma-rp) and metadata (dataset, skeleton, version, display name) are derived by parsing. """ import re from dataclasses import dataclass from typing import Optional # Canonical list: repo IDs in the same syntax as Hugging Face (org/Model-Name-v1). # Parser expects: org/Family-SKELETON-DATASET-version (e.g. Kimodo-SOMA-RP-v1). KIMODO_REPO_IDS = [ "nvidia/Kimodo-SOMA-RP-v1", "nvidia/Kimodo-SOMA-RP-v1.1", "nvidia/Kimodo-SMPLX-RP-v1", "nvidia/Kimodo-G1-RP-v1", "nvidia/Kimodo-SOMA-SEED-v1", "nvidia/Kimodo-SOMA-SEED-v1.1", "nvidia/Kimodo-G1-SEED-v1", ] TMR_REPO_IDS = [ "nvidia/TMR-SOMA-RP-v1", ] # Repo ID without org, for display (e.g. Kimodo-SOMA-RP-v1). _REPO_NAME_PATTERN = re.compile(r"^(Kimodo|TMR)-([A-Za-z0-9]+)-(RP|SEED)-v(\d+(?:\.\d+)*)$") @dataclass class ModelInfo: """Structured metadata for one model, derived from its repo ID.""" repo_id: str short_key: str family: str skeleton: str dataset: str version: str display_name: str @property def dataset_ui_label(self) -> str: return "Rigplay" if self.dataset == "RP" else "SEED" def _parse_repo_id(repo_id: str) -> Optional[ModelInfo]: """Parse a repo ID into ModelInfo. Returns None if format is unrecognized. """ # repo_id is "org/Model-Name-v1" if "/" in repo_id: _, name = repo_id.split("/", 1) else: name = repo_id m = _REPO_NAME_PATTERN.match(name) if not m: return None family, skeleton, dataset, ver = m.groups() # Normalize skeleton for display (as is for now) skeleton_display = skeleton # Include family so Kimodo-SOMA-RP and TMR-SOMA-RP have distinct keys. short_key = f"{family.lower()}-{skeleton.lower()}-{dataset.lower()}" return ModelInfo( repo_id=repo_id, short_key=short_key, family=family, skeleton=skeleton_display, dataset=dataset, version=f"v{ver}", display_name=name, ) def _version_tuple(v: str) -> tuple[int, ...]: """Parse 'vN' or 'vN.M' into a comparable tuple of ints.""" if v.startswith("v"): parts = v[1:].split(".") if all(p.isdigit() for p in parts): return tuple(int(p) for p in parts) return (0,) def _version_key(info: ModelInfo) -> tuple[int, ...]: return _version_tuple(info.version) def _build_registry() -> tuple[list[ModelInfo], dict[str, str], list[str]]: """Build model infos, short_key -> repo_id map, and list of short keys. When multiple versions exist for the same (family, skeleton, dataset), each ModelInfo gets a version-specific short_key (e.g. kimodo-soma-rp-v1, kimodo-soma-rp-v2) and a versionless alias (kimodo-soma-rp) is added to MODEL_NAMES pointing to the latest version. When only one version exists, the short_key stays versionless (e.g. kimodo-smplx-rp). """ all_repos = KIMODO_REPO_IDS + TMR_REPO_IDS infos: list[ModelInfo] = [] for repo_id in all_repos: info = _parse_repo_id(repo_id) if info is None: raise ValueError(f"Registry repo ID does not match expected pattern: {repo_id}") infos.append(info) # Group by base short_key to detect multi-version families. base_groups: dict[str, list[ModelInfo]] = {} for info in infos: base_groups.setdefault(info.short_key, []).append(info) # For groups with multiple versions, make each short_key version-specific. for base_key, group in base_groups.items(): if len(group) > 1: for info in group: info.short_key = f"{base_key}-{info.version}" # Map each (now unique) short_key to its repo_id. model_names: dict[str, str] = {} for info in infos: model_names[info.short_key] = info.repo_id # Add versionless aliases for multi-version groups, pointing to the latest. for base_key, group in base_groups.items(): if len(group) > 1: latest = max(group, key=_version_key) model_names[base_key] = latest.repo_id return infos, model_names, list(model_names.keys()) MODEL_INFOS, MODEL_NAMES, _SHORT_KEYS = _build_registry() AVAILABLE_MODELS = _SHORT_KEYS # Short-key lists for Kimodo vs TMR (load_model uses TMR_MODELS to branch). KIMODO_MODELS = [info.short_key for info in MODEL_INFOS if info.family == "Kimodo"] TMR_MODELS = [info.short_key for info in MODEL_INFOS if info.family == "TMR"] # Backward compatibility: FRIENDLY_NAMES for any code that still expects it. # Includes versioned short_keys and versionless aliases (latest display name). FRIENDLY_NAMES = {info.short_key: info.display_name for info in MODEL_INFOS} for _key, _repo_id in MODEL_NAMES.items(): if _key not in FRIENDLY_NAMES: for _info in MODEL_INFOS: if _info.repo_id == _repo_id: FRIENDLY_NAMES[_key] = _info.display_name break DEFAULT_MODEL = "kimodo-soma-rp" DEFAULT_TEXT_ENCODER_URL = "http://127.0.0.1:9550/" # Friendly names for skeleton dropdown (key -> label). SKELETON_DISPLAY_NAMES = { "SOMA": "SOMA Human Body", "SMPLX": "SMPLX Human Body", "G1": "Unitree G1 Humanoid Robot", } # Order for skeleton dropdown: SOMA, SMPLX, G1. SKELETON_ORDER = ("SOMA", "SMPLX", "G1") def get_skeleton_display_name(skeleton_key: str) -> str: """Return the UI label for a skeleton key (e.g. SOMA -> SOMA Human Body).""" return SKELETON_DISPLAY_NAMES.get(skeleton_key, skeleton_key) def get_skeleton_key_from_display_name(display_name: str) -> Optional[str]: """Return the skeleton key for a UI label, or None.""" for key, label in SKELETON_DISPLAY_NAMES.items(): if label == display_name: return key return None def get_skeleton_display_names_for_dataset(dataset_ui_label: str, family: Optional[str] = None) -> list[str]: """Return skeleton UI labels for the given dataset. If family is set (e.g. "Kimodo"), only skeletons with a model of that family are included. """ keys = get_skeletons_for_dataset(dataset_ui_label, family=family) return [get_skeleton_display_name(k) for k in keys] def get_short_key(repo_id: str) -> Optional[str]: """Return the short key for a repo ID, or None if not in registry.""" for info in MODEL_INFOS: if info.repo_id == repo_id: return info.short_key return None def get_model_info(short_key: str) -> Optional[ModelInfo]: """Return ModelInfo for a short key, or None if not found. When multiple versions share the same short_key, returns the one used for loading (the latest version), so CHECKPOINT_DIR and HF use the same version. """ repo_id = MODEL_NAMES.get(short_key) if repo_id is None: return None for info in MODEL_INFOS: if info.repo_id == repo_id: return info return None def get_short_key_from_display_name(display_name: str) -> Optional[str]: """Return short_key for a display name (e.g. Kimodo-SOMA-RP-v1), or None.""" for info in MODEL_INFOS: if info.display_name == display_name: return info.short_key return None def get_models_for_demo() -> list[ModelInfo]: """Return all model infos in registry order (for demo model list).""" return list(MODEL_INFOS) def get_datasets(family: Optional[str] = None) -> list[str]: """Return unique dataset UI labels (Rigplay, SEED) present in registry. If family is set (e.g. "Kimodo"), only datasets that have a model of that family are included. """ infos = MODEL_INFOS if family is not None: infos = [i for i in infos if i.family == family] labels = set() for info in infos: labels.add(info.dataset_ui_label) return sorted(labels) def get_skeletons_for_dataset(dataset_ui_label: str, family: Optional[str] = None) -> list[str]: """Return skeleton names that have a model for the given dataset. Order: SOMA, SMPLX, G1 (only those present for the dataset). If family is set (e.g. "Kimodo"), only skeletons with a model of that family are included. """ dataset = "RP" if dataset_ui_label == "Rigplay" else "SEED" infos = MODEL_INFOS if family is not None: infos = [i for i in infos if i.family == family] skeletons = set() for info in infos: if info.dataset == dataset: skeletons.add(info.skeleton) return [s for s in SKELETON_ORDER if s in skeletons] def get_versions_for_dataset_skeleton(dataset_ui_label: str, skeleton: str) -> list[str]: """Return version strings (e.g. v1) for the given dataset/skeleton. Sorted by version number so the last element is the highest (e.g. v1, v2). """ dataset = "RP" if dataset_ui_label == "Rigplay" else "SEED" versions = [] for info in MODEL_INFOS: if info.dataset == dataset and info.skeleton == skeleton: versions.append(info.version) return sorted(set(versions), key=_version_tuple) def get_models_for_dataset_skeleton( dataset_ui_label: str, skeleton: str, family: Optional[str] = None ) -> list[ModelInfo]: """Return model infos for the given dataset/skeleton, sorted by version (max first). Used to build the Version dropdown (options = full display names, one per model). If family is set (e.g. "Kimodo"), only models of that family are returned. """ dataset = "RP" if dataset_ui_label == "Rigplay" else "SEED" infos = [info for info in MODEL_INFOS if info.dataset == dataset and info.skeleton == skeleton] if family is not None: infos = [i for i in infos if i.family == family] return sorted(infos, key=_version_key, reverse=True) def resolve_to_short_key(dataset_ui_label: str, skeleton: str, version: str) -> Optional[str]: """Return the short key for (dataset, skeleton, version), or None.""" for info in MODEL_INFOS: if info.dataset_ui_label == dataset_ui_label and info.skeleton == skeleton and info.version == version: return info.short_key return None # ----------------------------------------------------------------------------- # Flexible model name resolution (partial names, case-insensitive, defaults) # ----------------------------------------------------------------------------- _FAMILY_ALIASES = {"kimodo": "Kimodo", "tmr": "TMR"} _DATASET_ALIASES = {"rp": "RP", "rigplay": "RP", "seed": "SEED"} _SKELETON_ALIASES = { "soma": "SOMA", "smplx": "SMPLX", "g1": "G1", } def _normalize_family(s: str) -> Optional[str]: """Return canonical family (Kimodo/TMR) or None if unknown.""" return _FAMILY_ALIASES.get(s.strip().lower()) def _normalize_dataset(s: str) -> Optional[str]: """Return canonical dataset (RP/SEED) or None if unknown.""" return _DATASET_ALIASES.get(s.strip().lower()) def _normalize_skeleton(s: str) -> Optional[str]: """Return canonical skeleton (SOMA/SMPLX/G1) or None if unknown.""" return _SKELETON_ALIASES.get(s.strip().lower()) def _get_latest_for_family_skeleton_dataset(family: str, skeleton: str, dataset: str) -> Optional[ModelInfo]: """Return the model info with the highest version for (family, skeleton, dataset).""" candidates = [ info for info in MODEL_INFOS if info.family == family and info.skeleton == skeleton and info.dataset == dataset ] if not candidates: return None return max(candidates, key=_version_key) def kimodo_short_key_for_skeleton_dataset(skeleton: str, dataset: str) -> Optional[str]: """Return the latest Kimodo model short_key for ``skeleton`` and ``dataset`` (RP/SEED), or None.""" info = _get_latest_for_family_skeleton_dataset("Kimodo", skeleton, dataset) return info.short_key if info is not None else None def registry_skeleton_for_joint_count(nb_joints: int) -> str: """Map motion joint count to registry skeleton key (SOMA / SMPLX / G1).""" if nb_joints == 34: return "G1" if nb_joints == 22: return "SMPLX" if nb_joints in (77, 30): return "SOMA" raise ValueError(f"No Kimodo model registered for motion with J={nb_joints}") # Optional version: Family-Skeleton-Dataset-vN or Family-Skeleton-Dataset _RESOLVE_FULL_PATTERN = re.compile( r"^(Kimodo|TMR|kimodo|tmr)[\-_]" r"([A-Za-z0-9]+)[\-_]" r"(RP|SEED|rp|seed)" r"(?:[\-_]v(\d+(?:\.\d+)*))?$", re.IGNORECASE, ) # Partial: Skeleton-Dataset or Skeleton or Dataset (no family) _RESOLVE_PARTIAL_PATTERN = re.compile( r"^([A-Za-z0-9]+)(?:[\-_](RP|SEED|rp|seed))?(?:[\-_]v(\d+(?:\.\d+)*))?$", re.IGNORECASE, ) def resolve_model_name(name: Optional[str], default_family: Optional[str] = None) -> str: """Resolve a user-facing model name to a short_key. Accepts full names (e.g. Kimodo-SOMA-RP-v1), case-insensitive matching, and partial names with defaults: dataset=RP, skeleton=SOMA, family from default_family (Kimodo for demo/generation, TMR for embed script). Omitted version resolves to the latest for that model. Args: name: User-provided name (can be None or empty). default_family: "Kimodo" or "TMR" when name is empty or omits family. Returns: Short key (e.g. kimodo-soma-rp) for use with load_model / MODEL_NAMES. Raises: ValueError: If name cannot be resolved or default_family is missing when needed. """ if name is not None: name = name.strip() if not name: if default_family is None: raise ValueError('Model name is empty; provide a name or set default_family ("Kimodo" or "TMR").') fam = _normalize_family(default_family) if fam is None: raise ValueError(f"default_family must be 'Kimodo' or 'TMR', got {default_family!r}") info = _get_latest_for_family_skeleton_dataset(fam, "SOMA", "RP") if info is None: raise ValueError(f"No model found for {fam}-SOMA-RP. Available: {list(MODEL_NAMES.keys())}") return info.short_key # Exact short_key if name in MODEL_NAMES: return name # Case-insensitive match against short_key or display_name name_lower = name.lower() matches = [] for info in MODEL_INFOS: if name_lower == info.short_key.lower(): matches.append(info) disp = info.display_name.lower() if name_lower == disp or name_lower == ("nvidia/" + disp): matches.append(info) if len(matches) == 1: return matches[0].short_key if len(matches) > 1: return matches[0].short_key # Parsed full form: Family-Skeleton-Dataset or Family-Skeleton-Dataset-vN m = _RESOLVE_FULL_PATTERN.match(name) if m: fam_raw, skel_raw, ds_raw, ver_num = m.groups() fam = _normalize_family(fam_raw) skel = _normalize_skeleton(skel_raw) ds = _normalize_dataset(ds_raw) if fam is not None and skel is not None and ds is not None: if ver_num is not None: version = f"v{ver_num}" for info in MODEL_INFOS: if info.family == fam and info.skeleton == skel and info.dataset == ds and info.version == version: return info.short_key else: info = _get_latest_for_family_skeleton_dataset(fam, skel, ds) if info is not None: return info.short_key # Parsed partial: Skeleton-Dataset, Skeleton, or Dataset (use default_family) if default_family is not None: m = _RESOLVE_PARTIAL_PATTERN.match(name) if m: tok1, ds_raw, ver_num = m.groups() fam = _normalize_family(default_family) if fam is not None: skel = _normalize_skeleton(tok1) ds_candidate = _normalize_dataset(ds_raw) if ds_raw else None if skel is not None and ds_candidate is not None: ds = ds_candidate elif skel is not None: ds = "RP" else: skel = "SOMA" ds = _normalize_dataset(tok1) if tok1 else "RP" if ds is None: ds = "RP" if ver_num is not None: version = f"v{ver_num}" for info in MODEL_INFOS: if ( info.family == fam and info.skeleton == skel and info.dataset == ds and info.version == version ): return info.short_key else: info = _get_latest_for_family_skeleton_dataset(fam, skel, ds) if info is not None: return info.short_key # Single token: skeleton or dataset fam = _normalize_family(default_family) if fam is not None: skel = _normalize_skeleton(name) if skel is not None: info = _get_latest_for_family_skeleton_dataset(fam, skel, "RP") if info is not None: return info.short_key ds = _normalize_dataset(name) if ds is not None: info = _get_latest_for_family_skeleton_dataset(fam, "SOMA", ds) if info is not None: return info.short_key raise ValueError( f"Model name {name!r} could not be resolved. " f"Use a short key (e.g. {list(MODEL_NAMES.keys())[:3]}...), " "a full name (e.g. Kimodo-SOMA-RP-v1), or a partial (e.g. SOMA-RP, SOMA) " "with default_family set." ) ================================================ FILE: kimodo/model/text_encoder_api.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Remote text encoder API client (Gradio) for motion generation.""" import logging import numpy as np import torch from gradio_client import Client # Suppress the [httpx] logs (GET requests) logging.getLogger("httpx").setLevel(logging.WARNING) # Suppress internal gradio_client logs logging.getLogger("gradio_client").setLevel(logging.WARNING) class TextEncoderAPI: """Text encoder API client for motion generation.""" def __init__(self, url: str): self.client = Client(url, verbose=False) self.device = "cpu" self.dtype = torch.float def _create_np_random_name(self): import uuid return str(uuid.uuid4()) + ".npy" def to(self, device=None, dtype=None): if device is not None: self.device = device if dtype is not None: self.dtype = dtype return self def __call__(self, texts): """Encode text prompts into tensors. Args: texts (str | list[str]): text prompts to encode Returns: tuple[torch.Tensor, list[int]]: encoded text tensors and their lengths """ if isinstance(texts, str): texts = [texts] tensors = [] lengths = [] for text in texts: filename = self._create_np_random_name() result = self.client.predict( text=text, filename=filename, api_name="/DemoWrapper", ) path = result[0]["value"] tensor = np.load(path) length = tensor.shape[0] tensors.append(tensor) lengths.append(length) padded_tensor = np.zeros((len(lengths), max(lengths), tensors[0].shape[-1]), dtype=tensors[0].dtype) for idx, (tensor, length) in enumerate(zip(tensors, lengths)): padded_tensor[idx, :length] = tensor padded_tensor = torch.from_numpy(padded_tensor) padded_tensor = padded_tensor.to(device=self.device, dtype=self.dtype) return padded_tensor, lengths ================================================ FILE: kimodo/model/tmr.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """TMR model: encoder, and text-to-motion retrieval head.""" import contextlib from pathlib import Path from typing import Dict, List, Optional, Tuple import torch import torch.nn as nn from einops import repeat from torch import Tensor from kimodo.model import load_checkpoint_state_dict from kimodo.motion_rep.feature_utils import length_to_mask from kimodo.sanitize import sanitize_texts from kimodo.skeleton import SkeletonBase, build_skeleton from kimodo.tools import ensure_batched class PositionalEncoding(nn.Module): """Sinusoidal positional encoding for sequences (batch_first optional).""" def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False) -> None: super().__init__() self.batch_first = batch_first self.dropout = nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) # Note: have to replace torch.exp() and math.log() with torch.pow() # due to MKL exp() and ln() throws floating point exceptions on certain CPUs div_term = torch.pow(10000.0, -torch.arange(0, d_model, 2).float() / d_model) # div_term = torch.exp( # torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model) # ) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) self.register_buffer("pe", pe, persistent=False) def forward(self, x: Tensor) -> Tensor: if self.batch_first: x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :] else: x = x + self.pe[: x.shape[0], :] return self.dropout(x) def load_ckpt(self, ckpt_path): """Load model weights from checkpoint path.""" state_dict = load_checkpoint_state_dict(ckpt_path) self.load_state_dict(state_dict) class ACTORStyleEncoder(nn.Module): """Motion encoder in ACTOR style: optional motion_rep projection, VAE/MLP tokens, transformer.""" def __init__( self, motion_rep: Optional[nn.Module], llm_shape: Optional[Tuple], vae: bool, latent_dim: int = 256, ff_size: int = 1024, num_layers: int = 4, num_heads: int = 4, dropout: float = 0.1, activation: str = "gelu", ckpt_path: Optional[str] = None, ) -> None: super().__init__() self.motion_rep = motion_rep if motion_rep is not None and llm_shape is None: nfeats = motion_rep.motion_rep_dim elif motion_rep is None and llm_shape is not None: nfeats = llm_shape[-1] else: raise ValueError self.nfeats = nfeats self.projection = nn.Linear(nfeats, latent_dim) self.vae = vae self.nbtokens = 2 if vae else 1 self.tokens = nn.Parameter(torch.randn(self.nbtokens, latent_dim)) self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout=dropout, batch_first=True) seq_trans_encoder_layer = nn.TransformerEncoderLayer( d_model=latent_dim, nhead=num_heads, dim_feedforward=ff_size, dropout=dropout, activation=activation, batch_first=True, ) self.seqTransEncoder = nn.TransformerEncoder( seq_trans_encoder_layer, num_layers=num_layers, enable_nested_tensor=False, ) if ckpt_path is not None: load_ckpt(self, ckpt_path) def forward(self, x_dict: Dict) -> Tensor: x = x_dict["x"] mask = x_dict["mask"] x = self.projection(x) device = x.device bs = len(x) tokens = repeat(self.tokens, "nbtoken dim -> bs nbtoken dim", bs=bs) xseq = torch.cat((tokens, x), 1) token_mask = torch.ones((bs, self.nbtokens), dtype=bool, device=device) aug_mask = torch.cat((token_mask, mask), 1) # add positional encoding xseq = self.sequence_pos_encoding(xseq) final = self.seqTransEncoder(xseq, src_key_padding_mask=~aug_mask) return final[:, : self.nbtokens] class TMR(nn.Module): r"""TMR: Text-to-Motion Retrieval inference code (no decoder) Find more information about the model on the following website: https://mathis.petrovich.fr/tmr """ @classmethod def from_args( cls, motion_rep: nn.Module, llm_shape: tuple | list, vae: bool, latent_dim: int = 256, ff_size: int = 1024, num_layers: int = 4, num_heads: int = 4, dropout: float = 0.1, activation: str = "gelu", ckpt_folder: Optional[str] = None, device: Optional[str] = None, **kwargs, ): motion_encoder, top_text_encoder = None, None motion_encoder = ACTORStyleEncoder( motion_rep=motion_rep, llm_shape=None, vae=vae, latent_dim=latent_dim, ff_size=ff_size, num_layers=num_layers, num_heads=num_heads, dropout=dropout, activation=activation, ckpt_path=Path(ckpt_folder) / "motion_encoder.pt", ).to(device) top_text_encoder = ACTORStyleEncoder( motion_rep=None, llm_shape=llm_shape, vae=vae, latent_dim=latent_dim, ff_size=ff_size, num_layers=num_layers, num_heads=num_heads, dropout=dropout, activation=activation, ckpt_path=Path(ckpt_folder) / "text_encoder.pt", ).to(device) return cls( motion_encoder, top_text_encoder, vae, device=device, **kwargs, ) def __init__( self, motion_encoder: nn.Module, top_text_encoder: nn.Module, vae: bool, text_encoder: Optional = None, fact: Optional[float] = None, sample_mean: Optional[bool] = True, unit_vector: Optional[bool] = False, compute_grads: bool = False, device: Optional[str] = None, ) -> None: super().__init__() self.motion_encoder = motion_encoder self.text_encoder = top_text_encoder self.raw_text_encoder = text_encoder self.motion_rep = None self.skeleton = None if self.motion_encoder is not None: self.motion_rep = self.motion_encoder.motion_rep if self.motion_rep is not None: self.skeleton = self.motion_rep.skeleton self.compute_grads = compute_grads self.device = device # sampling parameters self.vae = vae self.fact = fact if fact is not None else 1.0 self.sample_mean = sample_mean self.unit_vector = unit_vector def full_text_encoder(self, texts: list[str]): assert isinstance(texts, list), "The input should be batched." # sanitize the texts first # then encode the text, and then use the top text encoder texts = sanitize_texts(texts) text_feat, text_length = self.raw_text_encoder(texts) if isinstance(text_length, list): text_length = torch.tensor(text_length, device=self.device) else: text_length = text_length.to(self.device) inputs = { "x": text_feat.to(self.device), "mask": length_to_mask(text_length, device=self.device), } return self.text_encoder(inputs) def _find_encoder(self, inputs, modality): assert modality in ["text", "motion", "raw_text", "auto"] if modality == "text": return self.text_encoder elif modality == "motion": return self.motion_encoder elif modality == "raw_text": return self.full_text_encoder if isinstance(inputs[0], str): return self.full_text_encoder m_nfeats = self.motion_encoder.nfeats t_nfeats = self.text_encoder.nfeats if m_nfeats == t_nfeats: raise ValueError("Cannot automatically find the encoder, as they share the same input space.") nfeats = inputs["x"].shape[-1] if nfeats == m_nfeats: return self.motion_encoder elif nfeats == t_nfeats: return self.text_encoder else: raise ValueError("The inputs is not recognized.") def _encode( self, inputs, modality: str = "auto", sample_mean: Optional[bool] = None, fact: Optional[float] = None, return_distribution: bool = False, unit_vector: Optional[bool] = None, ): sample_mean = self.sample_mean if sample_mean is None else sample_mean fact = self.fact if fact is None else fact unit_vector = self.unit_vector if unit_vector is None else unit_vector # Encode the inputs encoder = self._find_encoder(inputs, modality) encoded = encoder(inputs) # Sampling if self.vae: dists = encoded.unbind(1) mu, logvar = dists if sample_mean: latent_vectors = mu else: # Reparameterization trick std = logvar.exp().pow(0.5) eps = std.data.new(std.size()).normal_() latent_vectors = mu + fact * eps * std else: dists = None (latent_vectors,) = encoded.unbind(1) if unit_vector: latent_vectors = torch.nn.functional.normalize(latent_vectors, dim=-1) if return_distribution: return latent_vectors, dists return latent_vectors @ensure_batched(posed_joints=4, lengths=1) def encode_motion( self, posed_joints: torch.Tensor, original_skeleton: Optional[SkeletonBase] = None, lengths: Optional[torch.Tensor] = None, unit_vector: Optional[bool] = None, ): # TODO here. convert_ctx = torch.no_grad() if not self.compute_grads else contextlib.nullcontext() if original_skeleton is None: original_skeleton = build_skeleton(posed_joints.shape[-2]) if lengths is None: nbatch, nbframes = posed_joints.shape[:2] device = posed_joints.device assert nbatch == 1, "If lenghts is not provided, the input should not be batched." lengths = torch.tensor([nbframes], device=device) # slice the posed joints if we use less joints skel_slice = self.motion_rep.skeleton.get_skel_slice(original_skeleton) posed_joints = posed_joints[..., skel_slice, :] with convert_ctx: features = self.motion_rep( posed_joints=posed_joints, to_canonicalize=True, to_normalize=True, lengths=lengths, ) mask = length_to_mask(lengths, device=features.device) x_dict = {"x": features, "mask": mask} latent_vectors = self._encode( x_dict, modality="motion", unit_vector=unit_vector, ) return latent_vectors def encode_text( self, x_dict: Dict, unit_vector: Optional[bool] = None, ): # TODO: make it ensure batched convert_ctx = torch.no_grad() if not self.compute_grads else contextlib.nullcontext() with convert_ctx: latent_vectors = self._encode( x_dict, modality="text", unit_vector=unit_vector, ) return latent_vectors def encode_raw_text( self, texts: List[str], unit_vector: Optional[bool] = None, ): is_batched = True if isinstance(texts, str): is_batched = False texts = [texts] convert_ctx = torch.no_grad() if not self.compute_grads else contextlib.nullcontext() with convert_ctx: latent_vectors = self._encode( texts, modality="raw_text", unit_vector=unit_vector, ) if not is_batched: latent_vectors = latent_vectors[0] return latent_vectors ================================================ FILE: kimodo/model/twostage_denoiser.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Two-stage transformer denoiser: root stage then body stage for motion diffusion.""" import contextlib from typing import Optional import torch from torch import nn from .backbone import TransformerEncoderBlock from .loading import load_checkpoint_state_dict class TwostageDenoiser(nn.Module): """Two-stage denoiser: first predicts global root features, then body features conditioned on local root.""" def __init__( self, motion_rep, motion_mask_mode, ckpt_path: Optional[str] = None, **kwargs, ): """Build root and body transformer blocks; optionally load checkpoint from ckpt_path.""" super().__init__() self.motion_rep = motion_rep self.motion_mask_mode = motion_mask_mode # it should be a dual motion_rep # and be global by default # global motion_rep as inpnut input_dim = motion_rep.motion_rep_dim will_concatenate = motion_mask_mode == "concat" # stage 1: root only root_input_dim = input_dim * 2 if will_concatenate else input_dim root_output_dim = motion_rep.global_root_dim self.root_model = TransformerEncoderBlock( input_dim=root_input_dim, output_dim=root_output_dim, skeleton=self.motion_rep.skeleton, **kwargs, ) # replace the global root by the local root local_motion_rep_dim = input_dim - motion_rep.global_root_dim + motion_rep.local_root_dim # stage 2: local body body_input_dim = local_motion_rep_dim + ( input_dim if will_concatenate else 0 ) # body stage always takes in local root info for motion (but still the global mask) body_output_dim = input_dim - motion_rep.global_root_dim self.body_model = TransformerEncoderBlock( input_dim=body_input_dim, output_dim=body_output_dim, skeleton=self.motion_rep.skeleton, **kwargs, ) if ckpt_path: self.load_ckpt(ckpt_path) def load_ckpt(self, ckpt_path: str) -> None: """Load checkpoint from path; state dict keys are stripped of 'denoiser.backbone.' prefix.""" state_dict = load_checkpoint_state_dict(ckpt_path) state_dict = {key.replace("denoiser.backbone.", ""): val for key, val in state_dict.items()} self.load_state_dict(state_dict) def forward( self, x: torch.Tensor, x_pad_mask: torch.Tensor, text_feat: torch.Tensor, text_feat_pad_mask: torch.Tensor, timesteps: torch.Tensor, first_heading_angle: Optional[torch.Tensor] = None, motion_mask: Optional[torch.Tensor] = None, observed_motion: Optional[torch.Tensor] = None, ) -> torch.Tensor: """ Args: x (torch.Tensor): [B, T, dim_motion] current noisy motion x_pad_mask (torch.Tensor): [B, T] attention mask, positions with True are allowed to attend, False are not text_feat (torch.Tensor): [B, max_text_len, llm_dim] embedded text prompts text_feat_pad_mask (torch.Tensor): [B, max_text_len] attention mask, positions with True are allowed to attend, False are not timesteps (torch.Tensor): [B,] current denoising step motion_mask observed_motion Returns: torch.Tensor: same size as input x """ if self.motion_mask_mode == "concat": if motion_mask is None or observed_motion is None: motion_mask = torch.zeros_like(x) observed_motion = torch.zeros_like(x) x = x * (1 - motion_mask) + observed_motion * motion_mask x_extended = torch.cat([x, motion_mask], axis=-1) else: x_extended = x # Stage 1: predict root motion in global root_motion_pred = self.root_model( x_extended, x_pad_mask, text_feat, text_feat_pad_mask, timesteps, first_heading_angle, ) # [B, T, 5] # Maybe pass this as argument instead of recomputing it lengths = x_pad_mask.sum(-1) # Convert root pred to local rep # At test-time want to allow gradient through for guidance convert_ctx = torch.no_grad() if self.training else contextlib.nullcontext() with convert_ctx: root_motion_local = self.motion_rep.global_root_to_local_root( root_motion_pred, normalized=True, lengths=lengths, ) if self.training: root_motion_local = root_motion_local.detach() # concatenate the predicted local root with the body motion body_x = x[..., self.motion_rep.body_slice] x_new = torch.cat([root_motion_local, body_x], axis=-1) if self.motion_mask_mode == "concat": x_new_extended = torch.cat([x_new, motion_mask], axis=-1) else: x_new_extended = x_new # Stage 2: predict local body motion based on local root predicted_body = self.body_model( x_new_extended, x_pad_mask, text_feat, text_feat_pad_mask, timesteps, first_heading_angle, ) # concatenate the predicted local body with the predicted root output = torch.cat([root_motion_pred, predicted_body], axis=-1) return output ================================================ FILE: kimodo/motion_rep/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Motion representation utilities.""" from .reps import KimodoMotionRep, MotionRepBase, TMRMotionRep __all__ = [ "MotionRepBase", "KimodoMotionRep", "TMRMotionRep", ] ================================================ FILE: kimodo/motion_rep/conditioning.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Constraint conditioning: build index and data dicts from constraint sets for the denoiser.""" from collections import defaultdict import torch def build_condition_dicts(constraints_lst: list): index_dict = defaultdict(list) data_dict = defaultdict(list) for constraint in constraints_lst: constraint.update_constraints(data_dict, index_dict) return index_dict, data_dict def get_unique_index_and_data(indices_lst, data): # unique + sort them by t indices_unique, inverse = torch.unique(indices_lst, dim=0, return_inverse=True) # pick first value for each unique (t, j) first_idx = torch.zeros(indices_unique.size(0), dtype=torch.long, device=inverse.device) first_idx.scatter_(0, inverse, torch.arange(len(inverse), device=inverse.device)) assert (indices_lst[first_idx] == indices_unique).all() # get the data indices_lst = indices_lst[first_idx] data = data[first_idx] return indices_lst, data ================================================ FILE: kimodo/motion_rep/feature_utils.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Motion representation helpers: velocity, heading, masks, and rotation of features.""" from typing import List, Optional, Union import einops import torch from kimodo.geometry import cont6d_to_matrix, matrix_to_cont6d from kimodo.skeleton import SkeletonBase from kimodo.tools import ensure_batched def diff_angles(angles: torch.Tensor, fps: float) -> torch.Tensor: """Compute frame-to-frame angular differences in radians, scaled by fps. Args: angles: [..., T] batched sequences of rotation angles in radians. fps: Sampling rate used to convert frame differences to per-second rate. Returns: [..., T-1] difference between consecutive angles (rad/s). """ cos = torch.cos(angles) sin = torch.sin(angles) cos_diff = cos[..., 1:] * cos[..., :-1] + sin[..., 1:] * sin[..., :-1] sin_diff = sin[..., 1:] * cos[..., :-1] - cos[..., 1:] * sin[..., :-1] # should be close to angles.diff() but more robust # multiply by fps = 1 / dt angles_diff = fps * torch.arctan2(sin_diff, cos_diff) return angles_diff @ensure_batched(positions=4, lengths=1) def compute_vel_xyz( positions: torch.Tensor, fps: float, lengths: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Compute the velocities from positions: dx/dt. Works with batches. The last velocity is duplicated to keep the same size. Args: positions (torch.Tensor): [..., T, J, 3] xyz positions of a human skeleton fps (float): frame per seconds lengths (Optional[torch.Tensor]): [...] size of each input batched. If not provided, positions should not be batched Returns: velocity (torch.Tensor): [..., T, J, 3] velocities computed from the positions """ device = positions.device if lengths is None: assert positions.shape[0] == 1, "If lengths is not provided, the input should not be batched." lengths = torch.tensor([len(positions)], device=device) # useful for indexing range_len = torch.arange(len(lengths)) # compute velocities with fps velocity = fps * (positions[:, 1:] - positions[:, :-1]) # pading the velocity vector vel_pad = torch.zeros_like(velocity[:, 0]) velocity, _ = einops.pack([velocity, vel_pad], "batch * nbjoints dim") # repeat the last velocities # with special care for different lengths with batches velocity[(range_len, lengths - 1)] = velocity[(range_len, lengths - 2)] return velocity @ensure_batched(root_rot_angles=2, lengths=1) def compute_vel_angle( root_rot_angles: torch.Tensor, fps: float, lengths: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Compute the local root rotation velocity: dtheta/dt. Args: root_rot_angles (torch.Tensor): [..., T] rotation angle (in radian) fps (float): frame per seconds lengths (Optional[torch.Tensor]): [...] size of each input batched. If not provided, root_rot_angles should not be batched Returns: local_root_rot_vel (torch.Tensor): [..., T] local root rotation velocity (in radian/s) """ device = root_rot_angles.device if lengths is None: assert root_rot_angles.shape[0] == 1, "If lengths is not provided, the input should not be batched." lengths = torch.tensor([len(root_rot_angles)], device=device) # useful for indexing range_len = torch.arange(len(lengths)) local_root_rot_vel = diff_angles(root_rot_angles, fps) pad_rot_vel_angles = torch.zeros_like(root_rot_angles[:, 0]) local_root_rot_vel, _ = einops.pack( [local_root_rot_vel, pad_rot_vel_angles], "batch *", ) # repeat the last rotation angle # with special care for different lengths with batches local_root_rot_vel[(range_len, lengths - 1)] = local_root_rot_vel[(range_len, lengths - 2)] return local_root_rot_vel @ensure_batched(posed_joints=4) def compute_heading_angle(posed_joints: torch.Tensor, skeleton: SkeletonBase) -> torch.Tensor: """Compute the heading direction from joint positions using the hip vector. Args: posed_joints: [B, T, J, 3] global joint positions. skeleton: Skeleton instance used to get hip joint indices. Returns: [B] heading angle in radians. """ # compute root heading for the sequence from hip positions r_hip, l_hip = skeleton.hip_joint_idx diff = posed_joints[:, :, r_hip] - posed_joints[:, :, l_hip] heading_angle = torch.atan2(diff[..., 2], -diff[..., 0]) return heading_angle def length_to_mask( length: Union[torch.Tensor, List], max_len: Optional[int] = None, device=None, ) -> torch.Tensor: """Convert sequence lengths to a boolean validity mask. Args: length: Sequence lengths, either a tensor ``[B]`` or a Python list. max_len: Optional mask width. If omitted, uses ``max(length)``. device: Optional device. When ``length`` is a list, this controls where the new tensor is created. Returns: A boolean tensor of shape ``[B, max_len]`` where ``True`` marks valid timesteps. """ if isinstance(length, list): if device is None: device = "cpu" length = torch.tensor(length, device=device) # Use requested device for output; move length if needed so mask and length match if device is not None: target = torch.device(device) if length.device != target: length = length.to(target) device = length.device if max_len is None: max_len = max(length) mask = torch.arange(max_len, device=device).expand(len(length), max_len) < length.unsqueeze(1) return mask class RotateFeatures: """Helper that applies a global heading rotation to motion features.""" def __init__(self, angle: torch.Tensor): """Precompute 2D and 3D rotation matrices for a batch of angles. Args: angle: Rotation angle(s) in radians, shaped ``[B]``. """ self.angle = angle ## Create the necessary rotations matrices cos, sin = torch.cos(angle), torch.sin(angle) one, zero = torch.ones_like(angle), torch.zeros_like(angle) # 2D rotation transposed (sin are -sin) self.corrective_mat_2d_T = torch.stack((cos, sin, -sin, cos), -1).reshape(angle.shape + (2, 2)) # 3D rotation on Y axis self.corrective_mat_Y = torch.stack((cos, zero, sin, zero, one, zero, -sin, zero, cos), -1).reshape( angle.shape + (3, 3) ) self.corrective_mat_Y_T = self.corrective_mat_Y.transpose(-2, -1).contiguous() def rotate_positions(self, positions: torch.Tensor): """Rotate 3D positions around the Y axis.""" return positions @ self.corrective_mat_Y_T def rotate_2d_positions(self, positions_2d: torch.Tensor): """Rotate 2D ``(x, z)`` vectors in the ground plane.""" return positions_2d @ self.corrective_mat_2d_T def rotate_rotations(self, rotations: torch.Tensor): """Left-multiply global rotation matrices by the heading correction.""" # "Rotate" the global rotations # which means add an extra Y rotation after the transform # so at the left R' = R_y R # (since we use the convention x' = R x) # "bik,btdkj->btdij" B, T, J = rotations.shape[:3] BTJ = B * T * J return ( self.corrective_mat_Y[:, None, None].expand(B, T, J, 3, 3).reshape(BTJ, 3, 3) @ rotations.reshape(BTJ, 3, 3) ).reshape(B, T, J, 3, 3) def rotate_6d_rotations(self, rotations_6d: torch.Tensor): """Rotate 6D rotation features via matrix conversion.""" return matrix_to_cont6d(self.rotate_rotations(cont6d_to_matrix(rotations_6d))) ================================================ FILE: kimodo/motion_rep/feet.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Foot contact detection from joint positions and velocities.""" import torch from ..tools import ensure_batched @ensure_batched(positions=4, velocity=4) def foot_detect_from_pos_and_vel( positions: torch.Tensor, velocity: torch.Tensor, skeleton, vel_thres: float, height_thresh: float, ) -> torch.Tensor: """Compute foot contact labels using heuristics combining joint height and velocities. Args: positions (torch.Tensor): [X, T, J, 3] global joint positions velocity (torch.Tensor): [X, T, J, 3] velocities (already padded correctly), already multiplied by 1 / dt vel_thres (float): threshold for joint velocity height_thresh (float): threshold for joint height Returns: torch.Tensor: [X, T, 4] contact labels for left and right foot joints (heel/toe order follows the skeleton joint index definition), where ``1`` denotes contact. """ device = positions.device # Use at most 2 foot joints per side (ankle + toe); SOMA77 defines a # third end-effector (ToeEnd) that SOMA30 and other skeletons omit. fid_l = skeleton.left_foot_joint_idx[:2] fid_r = skeleton.right_foot_joint_idx[:2] velfactor, heightfactor = ( torch.tensor([vel_thres, vel_thres], device=device), torch.tensor([height_thresh, height_thresh], device=device), ) feet_l_v = torch.linalg.norm(velocity[:, :, fid_l], axis=-1) feet_l_h = positions[:, :, fid_l, 1] feet_l = torch.logical_and( feet_l_v < velfactor, feet_l_h < heightfactor, ).to(positions.dtype) feet_r_v = torch.linalg.norm(velocity[:, :, fid_r], axis=-1) feet_r_h = positions[:, :, fid_r, 1] feet_r = torch.logical_and( feet_r_v < velfactor, feet_r_h < heightfactor, ).to(positions.dtype) foot_contacts = torch.cat((feet_l, feet_r), axis=-1) return foot_contacts ================================================ FILE: kimodo/motion_rep/reps/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Motion representation implementations: base, Kimodo, and TMR.""" from .base import MotionRepBase from .kimodo_motionrep import KimodoMotionRep from .tmr_motionrep import TMRMotionRep __all__ = [ "MotionRepBase", "KimodoMotionRep", "TMRMotionRep", ] ================================================ FILE: kimodo/motion_rep/reps/base.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Base motion representation: feature layout, normalization, and conditioning helpers.""" import os from typing import Optional import einops import numpy as np import torch from einops import repeat from ...tools import ensure_batched from ..conditioning import build_condition_dicts from ..feature_utils import compute_vel_angle, compute_vel_xyz from ..stats import Stats def _require_split_stats_layout(stats_path: str) -> None: """Raise if stats_path does not contain the required global_root, local_root, body subdirs.""" subdirs = ("global_root", "local_root", "body") missing = [] for name in subdirs: subpath = os.path.join(stats_path, name) mean_path = os.path.join(subpath, "mean.npy") if not os.path.isfile(mean_path): missing.append(f"{subpath}/ (mean.npy)") if missing: raise FileNotFoundError( f"Checkpoint stats must use the split layout with subfolders " f"global_root/, local_root/, and body/ under '{stats_path}'. " f"Missing or incomplete: {', '.join(missing)}. " ) class MotionRepBase: """Base class for motion representations used in generation and conditioning. Subclasses define: - ``size_dict``: feature blocks and their shapes, - ``last_root_feature``: last entry of the root block, - ``local_root_size_dict``: local-root feature layout, and implement transform-specific methods such as ``__call__``, ``inverse``, ``rotate``, ``translate_2d`` and ``create_conditions``. """ def __init__( self, skeleton, fps, stats_path: Optional[str] = None, ): """Initialize feature slicing metadata and optional normalization stats.""" self.skeleton = skeleton self.fps = fps self.nbjoints = skeleton.nbjoints self.feature_names = list(self.size_dict.keys()) self.ps = list(self.size_dict.values()) self.nfeats_dict = {key: val.numel() for key, val in self.size_dict.items()} feats_cumsum = np.cumsum([0] + list(self.nfeats_dict.values())).tolist() self.slice_dict = {key: slice(feats_cumsum[i], feats_cumsum[i + 1]) for i, key in enumerate(self.feature_names)} self.motion_rep_dim = sum(self.nfeats_dict.values()) self.root_slice = slice(0, self.slice_dict[self.last_root_feature].stop) self.body_slice = slice(self.root_slice.stop, self.motion_rep_dim) self.body_dim = self.body_slice.stop - self.body_slice.start self.global_root_dim = self.root_slice.stop self.local_root_dim = sum(val.numel() for val in self.local_root_size_dict.values()) if stats_path: _require_split_stats_layout(stats_path) self.global_root_stats = Stats(os.path.join(stats_path, "global_root")) self.local_root_stats = Stats(os.path.join(stats_path, "local_root")) self.body_stats = Stats(os.path.join(stats_path, "body")) # Global stats mean = torch.cat([self.global_root_stats.mean, self.body_stats.mean]) std = torch.cat([self.global_root_stats.std, self.body_stats.std]) assert len(mean) == len(std) == self.motion_rep_dim, "There is an stat issue." self.stats = Stats() self.stats.register_from_tensors(mean, std) def get_root_pos(self, features: torch.Tensor, fallback_to_smooth: bool = True): """Extract root positions from a feature tensor. Supports both ``root_pos`` and ``smooth_root_pos`` representations. """ if "root_pos" in self.slice_dict: return features[..., self.slice_dict["root_pos"]] if "smooth_root_pos" not in self.slice_dict: raise TypeError("This motion rep should have either a root_pos or smooth_root_pos field") if fallback_to_smooth: return features[:, :, self.slice_dict["smooth_root_pos"]] # else compute the root pos from the smooth root and local joints offset smooth_root_pos = features[:, :, self.slice_dict["smooth_root_pos"]].clone() local_joints_positions_flatten = features[..., self.slice_dict["local_joints_positions"]] hips_offset = local_joints_positions_flatten[..., self.skeleton.root_idx : self.skeleton.root_idx + 3] root_pos = torch.stack( [ smooth_root_pos[..., 0] + hips_offset[..., 0], smooth_root_pos[..., 1], smooth_root_pos[..., 2] + hips_offset[..., 2], ], axis=-1, ) return root_pos @ensure_batched(root_features=3, lengths=1) def global_root_to_local_root( self, root_features: torch.Tensor, normalized: bool, lengths: Optional[torch.Tensor], ): """Convert global root features to local-root motion features. Args: root_features: Root feature tensor containing root position and global heading, shaped ``[B, T, D_root]``. normalized: Whether ``root_features`` are normalized. lengths: Optional valid lengths per sequence. Returns: Tensor ``[B, T, 4]`` with local root rotational velocity, planar velocity, and global root height. """ if normalized: root_features = self.global_root_stats.unnormalize(root_features) [root_pos, global_root_heading] = einops.unpack(root_features, self.ps[:2], "batch time *") cos, sin = global_root_heading.unbind(-1) heading_angle = torch.arctan2(sin, cos) local_root_rot_vel = compute_vel_angle(heading_angle, self.fps, lengths=lengths) local_root_vel = compute_vel_xyz( root_pos[..., None, :], self.fps, lengths=lengths, )[..., 0, [0, 2]] global_root_y = root_pos[..., 1] local_root_motion = torch.cat( [ local_root_rot_vel[..., None], local_root_vel, global_root_y[..., None], ], axis=-1, ) if normalized: local_root_motion = self.local_root_stats.normalize(local_root_motion) return local_root_motion def get_root_heading_angle(self, features: torch.Tensor) -> torch.Tensor: """Compute root heading angle from cosine/sine heading features.""" global_root_heading = features[:, :, self.slice_dict["global_root_heading"]] cos, sin = global_root_heading.unbind(-1) return torch.arctan2(sin, cos) @ensure_batched(features=3) def rotate_to( self, features: torch.Tensor, target_angle: torch.Tensor, return_delta_angle=False, ): """Rotate each sequence so frame-0 heading matches ``target_angle``.""" # rotate so that the first frame angle is the target # it put the motion_rep to the angle current_first_angle = self.get_root_heading_angle(features)[:, 0] delta_angle = target_angle - current_first_angle rotated_features = self.rotate(features, delta_angle) if return_delta_angle: return rotated_features, delta_angle return rotated_features @ensure_batched(features=3) def rotate_to_zero( self, features: torch.Tensor, return_delta_angle=False, ): """Rotate each sequence so frame-0 heading becomes zero.""" target_angle = torch.zeros(len(features), device=features.device) return self.rotate_to(features, target_angle, return_delta_angle=return_delta_angle) @ensure_batched(features=3) def randomize_first_heading( self, features: torch.Tensor, return_delta_angle=False, ) -> torch.Tensor: """Rotate each sequence to a random frame-0 heading.""" target_heading_angle = torch.rand(features.shape[0]) * 2 * np.pi return self.rotate_to( features, target_heading_angle, return_delta_angle=return_delta_angle, ) @ensure_batched(features=3, target_2d_pos=2) def translate_2d_to( self, features: torch.Tensor, target_2d_pos: torch.Tensor, return_delta_pos: bool = False, ) -> torch.Tensor: """Translate each sequence so frame-0 root ``(x, z)`` matches a target.""" root_pos = self.get_root_pos(features) current_first_2d_pos = root_pos[:, 0, [0, 2]].clone() delta_2d_pos = target_2d_pos - current_first_2d_pos translated_features = self.translate_2d(features, delta_2d_pos) if return_delta_pos: return translated_features, delta_2d_pos return translated_features @ensure_batched(features=3) def translate_2d_to_zero( self, features: torch.Tensor, return_delta_pos: bool = False, ) -> torch.Tensor: """Translate each sequence so frame-0 root ``(x, z)`` is at the origin.""" target_2d_pos = torch.zeros(len(features), 2, device=features.device) return self.translate_2d_to(features, target_2d_pos, return_delta_pos=return_delta_pos) @ensure_batched(features=3) def canonicalize(self, features: torch.Tensor, normalized: bool = False): """Canonicalize heading and planar position at frame 0.""" if normalized: features = self.unnormalize(features) rotated_features = self.rotate_to_zero(features) canonicalized_features = self.translate_2d_to_zero(rotated_features) if normalized: canonicalized_features = self.normalize(canonicalized_features) return canonicalized_features def normalize(self, features): """Normalize features.""" return self.stats.normalize(features) def unnormalize(self, features): """Undo feature normalization.""" return self.stats.unnormalize(features) def create_conditions_from_constraints( self, constraints_lst: list, length: int, to_normalize: bool, device: str, ): """Create a conditioning tensor and mask from constraint objects.""" index_dict, data_dict = build_condition_dicts(constraints_lst) return self.create_conditions(index_dict, data_dict, length, to_normalize, device) def create_conditions_from_constraints_batched( self, constraints_lst: list | list[list], lengths: torch.Tensor, to_normalize: bool, device: str, ): """Batched version of ``create_conditions_from_constraints``. Supports either one shared constraint list for all batch elements, or a per-sample list of constraint lists. """ num_samples = len(lengths) if not constraints_lst or not isinstance(constraints_lst[0], list): # If no constraints, or constraints are shared across the batch, # build once and repeat. observed_motion, motion_mask = self.create_conditions_from_constraints( constraints_lst, int(lengths.max()), to_normalize, device ) observed_motion = repeat(observed_motion, "t d -> b t d", b=num_samples) motion_mask = repeat(motion_mask, "t d -> b t d", b=num_samples) return observed_motion, motion_mask length = int(lengths.max()) observed_motion_lst = [] motion_mask_lst = [] for constraints_lst_el in constraints_lst: observed_motion, motion_mask = self.create_conditions_from_constraints( constraints_lst_el, length, to_normalize, device, ) observed_motion_lst.append(observed_motion) motion_mask_lst.append(motion_mask) observed_motion = torch.stack(observed_motion_lst, axis=0) motion_mask = torch.stack(motion_mask_lst, axis=0) return observed_motion, motion_mask ================================================ FILE: kimodo/motion_rep/reps/kimodo_motionrep.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from typing import Optional import einops import torch from torch import Tensor from kimodo.tools import to_numpy from ...geometry import cont6d_to_matrix, matrix_to_cont6d from ...skeleton.kinematics import fk from ...skeleton.transforms import global_rots_to_local_rots from ...tools import ensure_batched from ..conditioning import get_unique_index_and_data from ..feature_utils import RotateFeatures, compute_heading_angle, compute_vel_xyz from ..feet import foot_detect_from_pos_and_vel from ..smooth_root import get_smooth_root_pos from .base import MotionRepBase class KimodoMotionRep(MotionRepBase): """Global root / global joints rotations representation, relative to a smooth root.""" def __init__( self, skeleton, fps, stats_path: Optional[str] = None, ): nbjoints = skeleton.nbjoints self.size_dict = { "smooth_root_pos": torch.Size([3]), "global_root_heading": torch.Size([2]), "local_joints_positions": torch.Size([nbjoints, 3]), "global_rot_data": torch.Size([nbjoints, 6]), "velocities": torch.Size([nbjoints, 3]), "foot_contacts": torch.Size([4]), } self.last_root_feature = "global_root_heading" self.local_root_size_dict = { "local_root_rot_vel": torch.Size([1]), "local_root_vel": torch.Size([2]), "global_root_y": torch.Size([1]), } super().__init__(skeleton, fps, stats_path) @ensure_batched(local_joint_rots=5, root_positions=3, lengths=1) def __call__( self, local_joint_rots: torch.Tensor, root_positions: torch.Tensor, to_normalize: bool, to_canonicalize: bool = False, lengths: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Convert local rotations and root trajectory into smooth-root features. Args: local_joint_rots: Local joint rotation matrices ``[B, T, J, 3, 3]``. root_positions: Root positions ``[B, T, 3]``. to_normalize: Whether to normalize output features. to_canonicalize: Whether to canonicalize output features (False by default). lengths: Optional valid lengths for variable-length batches. Returns: Motion features with shape ``[B, T, motion_rep_dim]``. """ device = local_joint_rots.device if lengths is None: assert local_joint_rots.shape[0] == 1, "If lenghts is not provided, the input should not be batched." lengths = torch.tensor([local_joint_rots.shape[1]], device=device) ( global_joints_rots, global_joints_positions, local_joints_positions_origin_is_pelvis, ) = fk(local_joint_rots, root_positions, self.skeleton) root_heading_angle = compute_heading_angle(global_joints_positions, self.skeleton) global_root_heading = torch.stack([torch.cos(root_heading_angle), torch.sin(root_heading_angle)], dim=-1) smooth_root_pos = get_smooth_root_pos(root_positions) hips_offset = root_positions - smooth_root_pos hips_offset[..., 1] = root_positions[..., 1] local_joints_positions = local_joints_positions_origin_is_pelvis + hips_offset[:, :, None] velocities = compute_vel_xyz(global_joints_positions, self.fps, lengths=lengths) foot_contacts = foot_detect_from_pos_and_vel(global_joints_positions, velocities, self.skeleton, 0.15, 0.10) global_rot_data = matrix_to_cont6d(global_joints_rots) features, _ = einops.pack( [ smooth_root_pos, global_root_heading, local_joints_positions, global_rot_data, velocities, foot_contacts, ], "batch time *", ) if to_canonicalize: features = self.canonicalize(features, normalized=False) if to_normalize: features = self.normalize(features) return features @ensure_batched(features=3, angle=1) def rotate(self, features: torch.Tensor, angle: torch.Tensor): """Rotate root/joint positional and rotational features by heading.""" # assume it is not normalized bs = features.shape[0] device = features.device [ smooth_root_pos, global_root_heading, local_joints_positions, global_rot_data, velocities, foot_contacts, ] = einops.unpack(features, self.ps, "batch time *") if not isinstance(angle, torch.Tensor): angle = torch.tensor(angle, device=device) if len(angle.shape) == 0: angle = angle.repeat(bs) RF = RotateFeatures(angle) new_features, _ = einops.pack( [ RF.rotate_positions(smooth_root_pos), RF.rotate_2d_positions(global_root_heading), RF.rotate_positions(local_joints_positions), RF.rotate_6d_rotations(global_rot_data), RF.rotate_positions(velocities), foot_contacts, ], "batch time *", ) return new_features @ensure_batched(features=3, translation_2d=2) def translate_2d( self, features: torch.Tensor, translation_2d: torch.Tensor, ) -> torch.Tensor: """Translate smooth root planar position by ``(dx, dz)``.""" # only move on the ground # If we need a translate_3D function, we should not forget to move the local_joints_positions as well bs = features.shape[0] if len(translation_2d.shape) == 1: translation_2d = translation_2d.repeat(bs, 1) new_features = features.clone() new_smooth_root_pos = new_features[:, :, self.slice_dict["smooth_root_pos"]] new_smooth_root_pos[:, :, 0] += translation_2d[:, [0]] new_smooth_root_pos[:, :, 2] += translation_2d[:, [1]] return new_features @ensure_batched(features=3) def inverse( self, features: torch.Tensor, is_normalized: bool, posed_joints_from="rotations", return_numpy: bool = False, ) -> torch.Tensor: """Decode smooth-root features into motion tensors.""" assert posed_joints_from in [ "rotations", "positions", ], "posed_joints_from should 'rotations' or 'positions'" if is_normalized: features = self.unnormalize(features) [ smooth_root_pos, global_root_heading, local_joints_positions, global_rot_data, velocities, foot_contacts, ] = einops.unpack(features, self.ps, "batch time *") global_rot_mats = cont6d_to_matrix(global_rot_data) local_rot_mats = global_rots_to_local_rots(global_rot_mats, self.skeleton) posed_joints_from_pos = local_joints_positions.clone() posed_joints_from_pos[..., 0] += smooth_root_pos[..., None, 0] posed_joints_from_pos[..., 2] += smooth_root_pos[..., None, 2] root_positions = posed_joints_from_pos[..., self.skeleton.root_idx, :] foot_contacts = foot_contacts > 0.5 if posed_joints_from == "rotations": _, posed_joints, _ = self.skeleton.fk( local_rot_mats, root_positions, ) else: posed_joints = posed_joints_from_pos output_tensor_dict = { "local_rot_mats": local_rot_mats, "global_rot_mats": global_rot_mats, "posed_joints": posed_joints, "root_positions": root_positions, "smooth_root_pos": smooth_root_pos, "foot_contacts": foot_contacts, "global_root_heading": global_root_heading, } if return_numpy: return to_numpy(output_tensor_dict) return output_tensor_dict def create_conditions( self, index_dict: dict[Tensor], data_dict: dict[Tensor], length: int, to_normalize: bool, device: str, ): """Build sparse conditioning tensors for smooth-root representation.""" # create empty features and mask to be filled in observed_motion = torch.zeros(length, self.motion_rep_dim, device=device) motion_mask = torch.zeros(length, self.motion_rep_dim, dtype=bool, device=device) def _cat_indices(indices_list: list[Tensor]) -> Tensor: indices = torch.cat([torch.tensor(x) if not isinstance(x, Tensor) else x for x in indices_list]) return indices.to(device=device, dtype=torch.long) def _match_obs_dtype(tensor: Tensor) -> Tensor: return tensor.to(device=device, dtype=observed_motion.dtype) if (fname := "smooth_root_2d") in index_dict and index_dict[fname]: indices = _cat_indices(index_dict[fname]) indices, smooth_root_2d = get_unique_index_and_data(indices, torch.cat(data_dict[fname])) smooth_root_2d = _match_obs_dtype(smooth_root_2d) f_sliced = observed_motion[:, self.slice_dict["smooth_root_pos"]] f_sliced[indices, 0] = smooth_root_2d[:, 0] f_sliced[indices, 2] = smooth_root_2d[:, 1] m_sliced = motion_mask[:, self.slice_dict["smooth_root_pos"]] m_sliced[indices, 0] = True m_sliced[indices, 2] = True if (fname := "root_y_pos") in index_dict and index_dict[fname]: indices = _cat_indices(index_dict[fname]) indices, root_pos_Y = get_unique_index_and_data(indices, torch.cat(data_dict[fname])) root_pos_Y = _match_obs_dtype(root_pos_Y) f_sliced = observed_motion[:, self.slice_dict["smooth_root_pos"]] f_sliced[indices, 1] = root_pos_Y m_sliced = motion_mask[:, self.slice_dict["smooth_root_pos"]] m_sliced[indices, 1] = True if (fname := "global_root_heading") in index_dict and index_dict[fname]: indices = _cat_indices(index_dict[fname]) indices, global_root_heading = get_unique_index_and_data(indices, torch.cat(data_dict[fname])) global_root_heading = _match_obs_dtype(global_root_heading) f_sliced = observed_motion[:, self.slice_dict[fname]] f_sliced[indices] = global_root_heading m_sliced = motion_mask[:, self.slice_dict[fname]] m_sliced[indices] = True if (fname := "global_joints_rots") in index_dict and index_dict[fname]: indices_lst = _cat_indices(index_dict[fname]) indices_lst, global_joints_rots = get_unique_index_and_data(indices_lst, torch.cat(data_dict[fname])) global_joints_rots = _match_obs_dtype(global_joints_rots) global_rot_data = matrix_to_cont6d(global_joints_rots) f_sliced = observed_motion[:, self.slice_dict["global_rot_data"]] masking = torch.zeros(len(f_sliced) * self.nbjoints, 6, device=device, dtype=bool) masking[indices_lst.T[0] * self.nbjoints + indices_lst.T[1]] = True masking = masking.reshape(len(f_sliced), self.nbjoints * 6) f_sliced[masking] = global_rot_data.flatten() m_sliced = motion_mask[:, self.slice_dict["global_rot_data"]] m_sliced[masking] = True if (fname := "global_joints_positions") in index_dict and index_dict[fname]: indices_lst = _cat_indices(index_dict[fname]) indices_lst, global_joints_positions = get_unique_index_and_data(indices_lst, torch.cat(data_dict[fname])) global_joints_positions = _match_obs_dtype(global_joints_positions) T_indices = indices_lst[:, 0].contiguous() _test = motion_mask[T_indices, self.slice_dict["smooth_root_pos"]] if not _test[:, [0, 2]].all(): raise ValueError("For constraining global positions, the smooth root should also be constrained.") smooth_root_pos = observed_motion[T_indices, self.slice_dict["smooth_root_pos"]].clone() local_reference = smooth_root_pos.clone() local_reference[..., 1] = 0.0 local_joints_positions = global_joints_positions - local_reference f_sliced = observed_motion[:, self.slice_dict["local_joints_positions"]] masking = torch.zeros(len(f_sliced) * self.nbjoints, 3, device=device, dtype=bool) masking[indices_lst.T[0] * self.nbjoints + indices_lst.T[1]] = True masking = masking.reshape(len(f_sliced), self.nbjoints * 3) f_sliced[masking] = local_joints_positions.flatten() m_sliced = motion_mask[:, self.slice_dict["local_joints_positions"]] m_sliced[masking] = True if to_normalize: observed_motion = self.normalize(observed_motion) return observed_motion, motion_mask ================================================ FILE: kimodo/motion_rep/reps/tmr_motionrep.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """TMR motion representation: global root, global joints, velocities, and foot contacts.""" from typing import Optional import einops import torch from ...skeleton.kinematics import fk from ...tools import ensure_batched, to_numpy from ..feature_utils import RotateFeatures, compute_heading_angle, compute_vel_xyz from ..feet import foot_detect_from_pos_and_vel from .base import MotionRepBase class TMRMotionRep(MotionRepBase): """Motion representation with global root and local joint positions. The local joint positions are rotation invariant (they all face z+) Feature layout: - root position ``(x, y, z)`` - root heading as ``(cos(theta), sin(theta))`` - local joint positions (root and rotation removed) - local joint velocities (rotation removed) - binary foot contacts """ def __init__( self, skeleton, fps, stats_path: Optional[str] = None, ): nbjoints = skeleton.nbjoints self.size_dict = { "root_pos": torch.Size([3]), "global_root_heading": torch.Size([2]), "local_joints_positions": torch.Size([nbjoints - 1, 3]), "velocities": torch.Size([nbjoints, 3]), "foot_contacts": torch.Size([4]), } self.last_root_feature = "global_root_heading" self.local_root_size_dict = { "local_root_rot_vel": torch.Size([1]), "local_root_vel": torch.Size([2]), "global_root_y": torch.Size([1]), } super().__init__(skeleton, fps, stats_path) @ensure_batched(local_joint_rots=5, root_positions=3, posed_joints=4, lengths=1) def __call__( self, local_joint_rots: Optional[torch.Tensor] = None, root_positions: Optional[torch.Tensor] = None, posed_joints: Optional[torch.Tensor] = None, *, to_normalize: bool, to_canonicalize: bool = False, lengths: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Convert motion inputs to this feature representation. Args: local_joint_rots: Local joint rotation matrices ``[B, T, J, 3, 3]``. Required when ``posed_joints`` is not provided. root_positions: Root translations ``[B, T, 3]``. Required when ``posed_joints`` is not provided. posed_joints: Optional precomputed global joint positions ``[B, T, J, 3]``. If passed, FK is skipped. to_normalize: Whether to normalize output features. to_canonicalize: Whether to canonicalize output features (False by default). lengths: Optional valid lengths for variable-length batches. Returns: Motion features with shape ``[B, T, motion_rep_dim]``. """ if posed_joints is not None: device = posed_joints.device nbatch, nbframes, nbjoints = posed_joints.shape[:3] else: device = local_joint_rots.device nbatch, nbframes, nbjoints = local_joint_rots.shape[:3] if lengths is None: assert nbatch == 1, "If lenghts is not provided, the input should not be batched." lengths = torch.tensor([nbframes], device=device) if posed_joints is None: _, global_positions, local_joints_positions_origin_is_pelvis = fk( local_joint_rots, root_positions, self.skeleton ) else: global_positions = posed_joints root_positions = posed_joints[:, :, 0] local_joints_positions_origin_is_pelvis = posed_joints - root_positions[:, :, None] root_heading_angle = compute_heading_angle(global_positions, self.skeleton) global_root_heading = torch.stack([torch.cos(root_heading_angle), torch.sin(root_heading_angle)], dim=-1) ground_offset = 0 * root_positions ground_offset[..., 1] = root_positions[..., 1] local_joints_positions = local_joints_positions_origin_is_pelvis[:, :, 1:] + ground_offset[:, :, None] velocities = compute_vel_xyz(global_positions, self.fps, lengths=lengths) # Remove the heading angle for each frame RF = RotateFeatures(-root_heading_angle) local_joints_positions = RF.rotate_positions(local_joints_positions) velocities = RF.rotate_positions(velocities) foot_contacts = foot_detect_from_pos_and_vel(global_positions, velocities, self.skeleton, 0.15, 0.10) features, _ = einops.pack( [ root_positions, global_root_heading, local_joints_positions, velocities, foot_contacts, ], "batch time *", ) if to_canonicalize: features = self.canonicalize(features, normalized=False) if to_normalize: features = self.normalize(features) return features @ensure_batched(features=3, angle=1) def rotate(self, features: torch.Tensor, angle: torch.Tensor): """Rotate all spatial features by a heading delta (radians).""" # rotate by the angle # it add the angle to the current features # assume it is not normalized bs = features.shape[0] device = features.device [ root_pos, global_root_heading, local_joints_positions, velocities, foot_contacts, ] = einops.unpack(features, self.ps, "batch time *") if not isinstance(angle, torch.Tensor): angle = torch.tensor(angle, device=device) if len(angle.shape) == 0: angle = angle.repeat(bs) RF = RotateFeatures(angle) new_features, _ = einops.pack( [ RF.rotate_positions(root_pos), RF.rotate_2d_positions(global_root_heading), local_joints_positions, # already rotation invariant velocities, # already rotation invariant foot_contacts, ], "batch time *", ) return new_features @ensure_batched(features=3, translation_2d=2) def translate_2d( self, features: torch.Tensor, translation_2d: torch.Tensor, ) -> torch.Tensor: """Translate root planar position by ``(dx, dz)``.""" # only move on the ground # For 3D, we should not forget to move the local_joints_positions as well bs = features.shape[0] if len(translation_2d.shape) == 1: translation_2d = translation_2d.repeat(bs, 1) new_features = features.clone() new_root_pos = new_features[:, :, self.slice_dict["root_pos"]] new_root_pos[:, :, 0] += translation_2d[:, 0] new_root_pos[:, :, 2] += translation_2d[:, 1] return new_features @ensure_batched(features=3) def inverse( self, features: torch.Tensor, is_normalized: bool, posed_joints_from="positions", return_numpy: bool = False, ) -> torch.Tensor: """Decode features back to a motion dictionary. Args: features: Feature tensor ``[B, T, D]``. is_normalized: Whether input features are normalized. posed_joints_from: Must be ``"positions"`` for this representation. return_numpy: Whether to convert tensors to numpy arrays. Returns: Dictionary containing reconstructed positions and auxiliary data. """ assert posed_joints_from == "positions" if is_normalized: features = self.unnormalize(features) [ root_positions, global_root_heading, local_joints_positions, velocities, foot_contacts, ] = einops.unpack(features, self.ps, "batch time *") dummy_root = 0 * local_joints_positions[:, :, [0]] posed_joints_from_pos = torch.stack([dummy_root, local_joints_positions], axis=2) posed_joints_from_pos[..., 0] += root_positions[..., None, 0] posed_joints_from_pos[..., 2] += root_positions[..., None, 2] root_positions = posed_joints_from_pos[..., self.skeleton.root_idx, :] foot_contacts = foot_contacts > 0.5 posed_joints = posed_joints_from_pos output_tensor_dict = { "local_rot_mats": None, "global_rot_mats": None, "posed_joints": posed_joints, "root_positions": root_positions, "foot_contacts": foot_contacts, "global_root_heading": global_root_heading, } if return_numpy: return to_numpy(output_tensor_dict) return output_tensor_dict ================================================ FILE: kimodo/motion_rep/smooth_root.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Smooth root trajectory: ADMM-based smoother with margin constraints and get_smooth_root_pos helper.""" import math import numpy as np import torch from scipy import sparse from scipy.sparse.linalg import splu from kimodo.tools import ensure_batched class TrajectorySmoother: """Modify trajectories to hit target values while respecting soft constraints. This smoother keeps the trajectory close to the original positions while minimizing accelerations. Targets are enforced at specified frames via soft constraints. """ def __init__( self, margins, pos_weight=0.0, loop=False, admm_iters=100, alpha_overrelax=1.0, circle_project=False, ): """Initialize the TrajectorySmoother. Args: margins: Array of margin values for each frame. margins[i] < 0: unconstrained margins[i] == 0: pinned on this frame margins[i] > 0: can deviate within the margin pos_weight: Weight for position preservation loop: Whether the trajectory should loop admm_iters: Number of ADMM iterations """ self.pos_weight = pos_weight self.admm_iters = admm_iters self.alpha_overrelax = alpha_overrelax self.circle_project = circle_project N = len(margins) # Store margin information as numpy arrays self.margin_vals = margins # Build acceleration matrix A a_data = [] a_rows = [] a_cols = [] for i in range(1, N - 1): scale = 1.0 a_data.extend([-scale, 2.0 * scale, -scale]) a_rows.extend([i, i, i]) a_cols.extend([i - 1, i, i + 1]) if loop: # Add periodic accelerations scale = 1.0 a_data.extend([-scale, 2.0 * scale, -scale]) a_rows.extend([0, 0, 0]) a_cols.extend([N - 1, 0, 1]) scale = 1.0 a_data.extend([-scale, 2.0 * scale, -scale]) a_rows.extend([N - 1, N - 1, N - 1]) a_cols.extend([N - 2, N - 1, 0]) A = sparse.csr_matrix((a_data, (a_rows, a_cols)), shape=(N, N)) # Build identity matrix identity_matrix = sparse.eye(N) # Build system matrix M M = pos_weight * identity_matrix + A.T @ A # Calculate ADMM step size diag_max = max(abs(M.diagonal())) self.admm_stepsize = 0.25 * np.sqrt(diag_max) M = M + self.admm_stepsize * identity_matrix self.system_lu = splu(M.tocsc()) def smooth(self, targets, x0): """Interpolate between reference positions while satisfying constraints. Args: observations: Target positions for constrained frames (numpy array) ref_positions: Reference positions defining original shape (numpy array) Returns: Interpolated positions (numpy array) """ x_target = targets.copy() x = x0.copy() z = np.zeros_like(x) u = np.zeros_like(x) for _ in range(self.admm_iters): self.z_update(z, x, x_target, u) self.u_update(u, x, z) self.x_update(x, z, u, x_target) return x def x_update(self, x, z, u, x_t): """Update x in the ADMM iteration.""" # x = (wp * I + A^T A + p I)^-1 (wp * x_orig + p (z - u)) r = self.pos_weight * x_t + self.admm_stepsize * (z - u) x[:] = self.system_lu.solve(r) def z_update(self, z, x, z_t, u): """Update z in the ADMM iteration using vectorized operations.""" # Compute the difference from target for all margin locations at once z[:] = x + u - z_t # Check if we need to project back to margin z_diff_norms = np.linalg.norm(z, axis=1) mask = z_diff_norms > self.margin_vals if np.any(mask): scale_factors = self.margin_vals[mask] / z_diff_norms[mask] z[mask] *= scale_factors[:, np.newaxis] # Add back the target z[:] += z_t if self.circle_project: z[:] = z / (np.linalg.norm(z, axis=1, keepdims=True) + 1.0e-6) def u_update(self, u, x, z): """Update u in the ADMM iteration using vectorized operations.""" u[:] += self.alpha_overrelax * (x - z) def smooth_signal(x, margins, pos_weight=0, alpha_overrelax=1.8, admm_iters=500, circle_project=False): """Multigrid trajectory smoothing with margin constraints. Args: x: Input trajectory ``[T, D]`` as a NumPy array. margins: Allowed radius around each target frame ``[T]``. pos_weight: Weight for staying close to the original signal. alpha_overrelax: ADMM over-relaxation coefficient. admm_iters: ADMM iterations per multigrid level. circle_project: If ``True``, project each vector to the unit sphere. Returns: Smoothed trajectory of shape ``[T, D]``. """ x_smoothed = x.copy() x_smoothed[:] = x.mean(axis=0, keepdims=True) # smooth the signal, multigrid style by starting out coarse, # doubling the resolution and repeating until we're at the full # resolution, using the previous result as the initial guess. levels = int(math.floor(math.log2(len(x)))) levels = max(levels - 4, 1) stepsize = 2**levels while True: # smooth signals at this level: num_steps = len(x_smoothed[::stepsize]) smoother = TrajectorySmoother( margins=margins[::stepsize], pos_weight=pos_weight, alpha_overrelax=alpha_overrelax, admm_iters=admm_iters, circle_project=circle_project, ) x_smoothed[::stepsize] = smoother.smooth(x[::stepsize], x_smoothed[::stepsize]) # interpolate to next level: next_stepsize = stepsize // 2 num_interleaved = len(x_smoothed[next_stepsize::stepsize]) if num_interleaved == num_steps: # linearly extrapolate the last value if we have to: x_smoothed[next_stepsize::stepsize][-1] = ( x_smoothed[::stepsize][-1] + (x_smoothed[::stepsize][-1] - x_smoothed[::stepsize][-2]) / 2 ) num_interleaved = num_interleaved - 1 # linearly interpolate the remaining values: x_smoothed[next_stepsize::stepsize][:num_interleaved] = ( x_smoothed[::stepsize][:-1] + x_smoothed[::stepsize][1:] ) / 2 if stepsize == 1: break stepsize //= 2 return x_smoothed @ensure_batched(hip_translations=3) def get_smooth_root_pos(hip_translations): """Smooth root trajectory in the ground plane while preserving height. Args: hip_translations: Root translations ``[B, T, 3]``. Returns: Smoothed root translations ``[B, T, 3]`` where ``x/z`` are smoothed and ``y`` remains unchanged. """ root_translations_xz = hip_translations[..., [0, 2]] root_translations_y = hip_translations[..., [1]] batch_size, nframes = root_translations_xz.shape[:2] margins = np.full(root_translations_xz.shape[1], 0.06) root_translations_smoothed_xz = [] for batch in range(batch_size): root_translations_smoothed_xz.append( smooth_signal(root_translations_xz[batch].detach().cpu().numpy(), margins)[None] ) root_translations_smoothed_xz = torch.tensor(np.concatenate(root_translations_smoothed_xz)) root_translations = torch.cat( [ root_translations_smoothed_xz.to(root_translations_y.device), root_translations_y, ], dim=-1, )[..., [0, 2, 1]] return root_translations ================================================ FILE: kimodo/motion_rep/stats.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Feature normalization statistics (mean/std) for motion representations.""" import logging import os from typing import Optional import numpy as np import torch log = logging.getLogger(__name__) class Stats(torch.nn.Module): """Utility module for feature normalization statistics. Normalization follows: ``(data - mean) / sqrt(std**2 + eps)`` """ def __init__( self, folder: Optional[str] = None, load: bool = True, eps=1e-05, ): super().__init__() self.folder = folder self.eps = eps if folder is not None and load: self.load() def sliced(self, indices): """Return a new ``Stats`` object containing selected feature indices.""" new_stats = Stats(folder=self.folder, load=False, eps=self.eps) new_stats.register_from_tensors( self.mean[..., indices].clone(), self.std[..., indices].clone(), ) return new_stats def load(self): """Load ``mean.npy`` and ``std.npy`` from ``self.folder``.""" mean_path = os.path.join(self.folder, "mean.npy") std_path = os.path.join(self.folder, "std.npy") if not os.path.exists(mean_path) or not os.path.exists(std_path): raise FileNotFoundError( f"Missing stats files in '{self.folder}'. Expected:\n" f" - {mean_path}\n" f" - {std_path}\n\n" "Make sure the checkpoint/stats have been downloaded and are mounted into the container.\n" "If you're using Docker Compose, run it from the repo root so `./:/workspace` mounts the correct directory." ) mean = torch.from_numpy(np.load(mean_path)) std = torch.from_numpy(np.load(std_path)) self.register_from_tensors(mean, std) def register_from_tensors(self, mean: torch.Tensor, std: torch.Tensor): """Register mean/std tensors as non-persistent buffers.""" self.register_buffer("mean", mean, persistent=False) self.register_buffer("std", std, persistent=False) def normalize(self, data: torch.Tensor) -> torch.Tensor: """Normalize data using the stored statistics.""" mean = self.mean.to(device=data.device, dtype=data.dtype) std = self.std.to(device=data.device, dtype=data.dtype) # adjust std with eps return (data - mean) / torch.sqrt(std**2 + self.eps) def unnormalize(self, data: torch.Tensor) -> torch.Tensor: """Undo normalization using the stored statistics.""" mean = self.mean.to(device=data.device, dtype=data.dtype) std = self.std.to(device=data.device, dtype=data.dtype) # adjust std with eps return data * torch.sqrt(std**2 + self.eps) + mean def is_loaded(self): """Return whether statistics are currently available.""" return hasattr(self, "mean") def get_dim(self): """Return feature dimensionality.""" return self.mean.shape[0] def save( self, folder: Optional[str] = None, mean: Optional[torch.Tensor] = None, std: Optional[torch.Tensor] = None, ): """Save statistics to ``folder`` as ``mean.npy`` and ``std.npy``.""" if folder is None: folder = self.folder if folder is None: raise ValueError("No folder to save stats") if mean is None and std is None: try: mean = self.mean.cpu().numpy() std = self.std.cpu().numpy() except AttributeError: raise ValueError("Stats were not loaded") # don't override stats folder os.makedirs(folder, exist_ok=False) np.save(os.path.join(folder, "mean.npy"), mean) np.save(os.path.join(folder, "std.npy"), std) def __eq__(self, other): return (self.mean.cpu() == other.mean.cpu()).all() and (self.std.cpu() == other.std.cpu()).all() # should define a hash value for pytorch, as we defined __eq__ def __hash__(self): # Convert mean and std to bytes for a consistent hash value mean_hash = hash(self.mean.detach().cpu().numpy().tobytes()) std_hash = hash(self.std.detach().cpu().numpy().tobytes()) return hash((mean_hash, std_hash)) def __repr__(self): return f'Stats(folder="{self.folder}")' ================================================ FILE: kimodo/postprocess.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Post-processing utilities for motion generation output.""" from types import SimpleNamespace from typing import Dict, List, Optional, Tuple import numpy as np import torch from .constraints import ( EndEffectorConstraintSet, FullBodyConstraintSet, Root2DConstraintSet, ) from .geometry import matrix_to_quaternion, quaternion_to_matrix from .skeleton import ( G1Skeleton34, SkeletonBase, SMPLXSkeleton22, SOMASkeleton30, SOMASkeleton77, fk, ) def extract_input_motion_from_constraints( constraint_lst: List, skeleton: SkeletonBase, num_frames: int, num_joints: int, ) -> Tuple[torch.Tensor, torch.Tensor]: """Extract hip translations and local rotations from constraints for postprocessing. Args: constraint_lst: List of constraints (FullBodyConstraintSet, EndEffectorConstraintSet, etc.) skeleton: Skeleton instance num_frames: Total number of frames in the motion num_joints: Number of joints Returns: Tuple of (hip_translations_input, rotations_input): - hip_translations_input: Hip translations, shape (T, 3) - rotations_input: Local joint rotations as quaternions, shape (T, J, 4) """ # Initialize with zeros for all frames hip_translations_input = torch.zeros(num_frames, 3) rotations_input = torch.zeros(num_frames, num_joints, 4) rotations_input[..., 0] = 1.0 # Initialize as identity quaternions (w=1, x=y=z=0) def _match_hip_dtype(tensor: torch.Tensor) -> torch.Tensor: return tensor.to(device=hip_translations_input.device, dtype=hip_translations_input.dtype) def _match_rot_dtype(tensor: torch.Tensor) -> torch.Tensor: return tensor.to(device=rotations_input.device, dtype=rotations_input.dtype) if not constraint_lst: return hip_translations_input, rotations_input # Sort constraints to ensure FullBodyConstraintSet is processed last # This ensures it will get the last say on whether hip translations need to be exact root or smoothed root sorted_constraints = sorted(constraint_lst, key=lambda c: isinstance(c, FullBodyConstraintSet)) for constraint in sorted_constraints: frame_indices = constraint.frame_indices if isinstance(frame_indices, torch.Tensor): valid_mask = frame_indices < num_frames if valid_mask.sum() == 0: continue frame_indices = frame_indices[valid_mask] else: valid_positions = [i for i, idx in enumerate(frame_indices) if idx < num_frames] if not valid_positions: continue frame_indices = [frame_indices[i] for i in valid_positions] # Handle Root2DConstraintSet separately - only assign smooth_root_2d at xz dimensions if isinstance(constraint, Root2DConstraintSet): smooth_root_2d = constraint.smooth_root_2d # (K, 2) where K = len(frame_indices) if isinstance(frame_indices, torch.Tensor): smooth_root_2d = smooth_root_2d[valid_mask] else: smooth_root_2d = smooth_root_2d[valid_positions] smooth_root_2d = _match_hip_dtype(smooth_root_2d) hip_translations_input[frame_indices, 0] = smooth_root_2d[:, 0] # x hip_translations_input[frame_indices, 2] = smooth_root_2d[:, 1] # z continue elif isinstance(constraint, FullBodyConstraintSet) or isinstance(constraint, EndEffectorConstraintSet): global_rots = constraint.global_joints_rots # (K, J, 3, 3) where K = len(frame_indices) global_positions = constraint.global_joints_positions # (K, J, 3) if isinstance(frame_indices, torch.Tensor): global_rots = global_rots[valid_mask] global_positions = global_positions[valid_mask] smooth_root_2d = constraint.smooth_root_2d[valid_mask] else: global_rots = global_rots[valid_positions] global_positions = global_positions[valid_positions] smooth_root_2d = constraint.smooth_root_2d[valid_positions] root_positions = global_positions[:, skeleton.root_idx] # (K, 3) # replace xz with smooth_root_2d values for EE constraints that do not include Hips # since the hips themselves are not actually constrained in the model conditioning if isinstance(constraint, EndEffectorConstraintSet) and "Hips" not in constraint.joint_names: root_positions[:, 0] = smooth_root_2d[:, 0] # x root_positions[:, 2] = smooth_root_2d[:, 1] # z local_rot_mats = skeleton.global_rots_to_local_rots(global_rots) # (K, J, 3, 3) local_rot_quats = matrix_to_quaternion(local_rot_mats) # (K, J, 4) hip_translations_input[frame_indices] = _match_hip_dtype(root_positions) rotations_input[frame_indices] = _match_rot_dtype(local_rot_quats) else: NotImplementedError(f"Constraint {constraint.name} is not supported") return hip_translations_input, rotations_input def create_working_rig_from_skeleton( skeleton: SkeletonBase, above_ground_offset: float = 0.007 ) -> List[SimpleNamespace]: """Create the working rig as a list of SimpleNamespace objects from skeleton. Args: skeleton: SkeletonBase instance with bone_order_names, neutral_joints, joint_parents above_ground_offset: Additional offset to position the rig slightly above ground Returns: List of SimpleNamespace objects representing the working rig """ working_rig_joints = [] joint_names = skeleton.bone_order_names neutral_positions = skeleton.neutral_joints.cpu().numpy() parent_indices = skeleton.joint_parents.cpu().numpy() if isinstance(skeleton, (G1Skeleton34, SMPLXSkeleton22)): retarget_map = { skeleton.bone_order_names[skeleton.root_idx]: "Hips", skeleton.left_hand_joint_names[0]: "LeftHand", skeleton.right_hand_joint_names[0]: "RightHand", skeleton.left_foot_joint_names[0]: "LeftFoot", skeleton.right_foot_joint_names[0]: "RightFoot", } else: # works for SOMA retarget_map = { "Hips": "Hips", "Head": "Head", "LeftHand": "LeftHand", "RightHand": "RightHand", "LeftFoot": "LeftFoot", "RightFoot": "RightFoot", } for i, joint_name in enumerate(joint_names): parent_name = None if parent_indices[i] == -1 else joint_names[parent_indices[i]] # Calculate local translation relative to parent if parent_indices[i] == -1: # Move the rig so that the lowest point (toe) is at ground level (y=0), # plus a small offset to position the rig slightly above ground toe_height = neutral_positions[:, 1].min() # lowest y-coordinate (toe) local_translation = ( neutral_positions[i] + np.array([0.0, -toe_height + above_ground_offset, 0.0]) ).tolist() else: parent_idx = parent_indices[i] parent_position = neutral_positions[parent_idx] joint_position = neutral_positions[i] local_translation = (joint_position - parent_position).tolist() # Default rotation (identity quaternion: x=0, y=0, z=0, w=1) default_rotation = [0.0, 0.0, 0.0, 1.0] joint_info = SimpleNamespace( name=joint_name, parent=parent_name, t_pose_rotation=default_rotation, t_pose_translation=local_translation, retarget_tag=retarget_map.get(joint_name), ) working_rig_joints.append(joint_info) return working_rig_joints def post_process_motion( local_rot_mats: torch.Tensor, root_positions: torch.Tensor, contacts: torch.Tensor, skeleton: SkeletonBase, constraint_lst: Optional[List] = None, contact_threshold: float = 0.5, root_margin: float = 0.04, ) -> Dict[str, torch.Tensor]: """Post-process generated motion to reduce foot skating and improve quality. Args: local_rot_mats: Local joint rotation matrices, shape (B, T, J, 3, 3) root_positions: Root joint positions, shape (B, T, 3) contacts: Foot contact labels, shape (B, T, num_contacts) skeleton: Skeleton instance constraint_lst: Optional list of constraints (or list of lists of constraints for batched inference)(FullBodyConstraintSet, etc.) contact_threshold: Threshold for foot contact detection root_margin: Margin for root position correction Returns: Dictionary with corrected motion data: - local_rot_mats: Corrected local rotation matrices (B, T, J, 3, 3) - root_positions: Corrected root positions (B, T, 3) - posed_joints: Corrected global joint positions (B, T, J, 3) - global_rot_mats: Corrected global rotation matrices (B, T, J, 3, 3) """ # Ensure batch dimension assert local_rot_mats.dim() == 5, "local_rot_mats should be 5D, make sure to include the batch dimension" batch_size, num_frames, num_joints = local_rot_mats.shape[:3] def _build_constraint_masks_dict(constraints: List) -> Dict[str, torch.Tensor]: out = { key: torch.zeros(num_frames, dtype=torch.float32) for key in [ "FullBody", "LeftFoot", "RightFoot", "LeftHand", "RightHand", "Root", ] } for constraint in constraints: frame_indices = constraint.frame_indices if isinstance(frame_indices, torch.Tensor): frame_indices = frame_indices[frame_indices < num_frames] if frame_indices.numel() == 0: continue else: frame_indices = [idx for idx in frame_indices if idx < num_frames] if not frame_indices: continue if constraint.name == "fullbody": out["FullBody"][frame_indices] = 1.0 elif constraint.name == "left-foot": out["LeftFoot"][frame_indices] = 1.0 elif constraint.name == "right-foot": out["RightFoot"][frame_indices] = 1.0 elif constraint.name == "left-hand": out["LeftHand"][frame_indices] = 1.0 elif constraint.name == "right-hand": out["RightHand"][frame_indices] = 1.0 elif constraint.name == "root2d": out["Root"][frame_indices] = 1.0 return out # Create constraint masks from constraint_lst (one dict per batch item when batched) batched_constraints = bool(constraint_lst) and isinstance(constraint_lst[0], list) if batched_constraints: constraint_masks_dict_lst = [_build_constraint_masks_dict(constraint_lst[b]) for b in range(batch_size)] else: constraint_masks_dict = ( _build_constraint_masks_dict(constraint_lst) if constraint_lst else { key: torch.zeros(num_frames, dtype=torch.float32) for key in [ "FullBody", "LeftFoot", "RightFoot", "LeftHand", "RightHand", "Root", ] } ) # Create working rig above_ground_offset = 0.02 if isinstance(skeleton, (SOMASkeleton30, SOMASkeleton77)) else 0.007 # larger offset for SOMA since model tends to generate lower to the ground working_rig = create_working_rig_from_skeleton(skeleton, above_ground_offset=above_ground_offset) has_double_ankle_joints = isinstance(skeleton, G1Skeleton34) # Prepare input tensors. The generated motion will be modified in place. Clone first. neutral_joints_pelvis_offset = skeleton.neutral_joints[0].cpu().clone() hip_translations_corrected = root_positions.cpu().clone() rotations_corrected = matrix_to_quaternion(local_rot_mats).cpu().clone() # (B, T, J, 4) contacts = contacts.cpu() # Extract input motion (target keyframes) from constraints for each batch # For constrained keyframes, use the original motion from constraints # For non-constrained frames, zeros are used hip_translations_input = torch.zeros(batch_size, num_frames, 3) rotations_input = torch.zeros(batch_size, num_frames, num_joints, 4) rotations_input[..., 0] = 1.0 # Initialize as identity quaternions (w=1, x=y=z=0) if constraint_lst: for b in range(batch_size): # Get constraints for this batch item (if batched) or use the same list constraints_lst_el = ( constraint_lst[b] if isinstance( constraint_lst[0], list ) # when the constraint_list is in batch format, each item in a list is a constraintlist for one sample else constraint_lst # single constraint list shared for all samples in the batch ) hip_translations_input[b], rotations_input[b] = extract_input_motion_from_constraints( constraints_lst_el, skeleton, num_frames, num_joints, ) # Call the motion correction for each batch (optional package) try: from motion_correction import motion_postprocess except ImportError as e: raise RuntimeError( "Motion correction is required for this postprocessing path but the " "motion_correction package is not installed. Install with: pip install -e ." ) from e for b in range(batch_size): masks_b = constraint_masks_dict_lst[b] if batched_constraints else constraint_masks_dict motion_postprocess.correct_motion( hip_translations_corrected[b : b + 1], rotations_corrected[b : b + 1], contacts[b : b + 1], hip_translations_input[b : b + 1], rotations_input[b : b + 1], masks_b, contact_threshold, root_margin, working_rig, has_double_ankle_joints, ) local_rot_mats_corrected = quaternion_to_matrix(rotations_corrected) # Compute posed joints using FK device = local_rot_mats.device global_rot_mats, posed_joints, _ = fk( local_rot_mats_corrected.to(device), hip_translations_corrected.to(device), skeleton, ) result = { "local_rot_mats": local_rot_mats_corrected.to(device), "root_positions": hip_translations_corrected.to(device), "posed_joints": posed_joints, "global_rot_mats": global_rot_mats, } return result ================================================ FILE: kimodo/sanitize.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Text prompt sanitization for motion generation (whitespace, punctuation, capitalization).""" def sanitize_text(text: str, paragraph: bool = True) -> str: """Sanitize a text prompt: strip, collapse spaces, capitalize, trim non-alphanumeric, add/fix final punctuation. Args: text: Input text prompt. paragraph: If True, capitalize after each sentence break and normalize spacing between sentences. Returns: Sanitized text. """ # remove any trailing or leading whitespace text = text.strip() # https://stackoverflow.com/a/1546251 # replace duplicate spaces by one space text = " ".join(text.split()) if text == "": return text # removing leading non alpha numeric characters for i, c in enumerate(text): if not str.isalnum(c): continue break text = text[i:] # Capitalize text = text.capitalize() final_punctuations = ".!?\"])'" # removing trailing non alpha numeric characters # expect final punctuations for i, c in reversed(list(enumerate(text))): if not str.isalnum(c) and c not in final_punctuations: continue break text = text[: i + 1] # Adding period at the end if needed if text[-1] not in ".!?": text = text + "." if paragraph: # fix end of sentences if several sentences for sentence_break in ".!?": subtexts = text.split(sentence_break) text = f"{sentence_break} ".join( # put back a space after the break [ y[0].capitalize() + y[1:] # only capitalize the first character if y else y # y is empty at the end for x in subtexts for y in [x.strip()] # remove extra spaces ] ).strip() # remove extra space at the end return text def sanitize_texts(texts: list[str]) -> list[str]: """Sanitize each text prompt in the list (see sanitize_text). Args: texts: List of input text prompts. Returns: List of sanitized texts. """ return [sanitize_text(text) for text in texts] if __name__ == "__main__": texts = [ " A person is walking.", "someone go forward", "jump", "jumping!", "jumping)", "-go", "blocasdji -----", "", ] print("Old texts") print("\n".join(texts)) print() new_texts = sanitize_texts(texts) print("Sanitized texts") print("\n".join(new_texts)) ================================================ FILE: kimodo/scripts/__init__.py ================================================ ================================================ FILE: kimodo/scripts/docker-entrypoint.sh ================================================ #!/usr/bin/env bash set -euo pipefail HOST_UID="${HOST_UID:-}" HOST_GID="${HOST_GID:-}" HOST_USER="${HOST_USER:-user}" if [[ -z "${HOST_UID}" || -z "${HOST_GID}" ]]; then if [[ -d /workspace ]]; then HOST_UID="$(stat -c %u /workspace)" HOST_GID="$(stat -c %g /workspace)" else HOST_UID="${HOST_UID:-1000}" HOST_GID="${HOST_GID:-1000}" fi fi if ! getent group "${HOST_GID}" >/dev/null 2>&1; then groupadd -g "${HOST_GID}" "${HOST_USER}" fi if ! getent passwd "${HOST_UID}" >/dev/null 2>&1; then useradd -m -u "${HOST_UID}" -g "${HOST_GID}" -s /bin/bash "${HOST_USER}" fi exec gosu "${HOST_UID}:${HOST_GID}" "$@" ================================================ FILE: kimodo/scripts/generate.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import argparse import os import shutil from typing import Any, Dict, Optional import torch from kimodo import DEFAULT_MODEL, load_model from kimodo.constraints import load_constraints_lst from kimodo.exports.motion_io import save_kimodo_npz from kimodo.meta import load_prompts_from_meta from kimodo.model.cfg import CFG_TYPES from kimodo.model.registry import get_model_info from kimodo.tools import load_json, save_json, seed_everything def parse_args(): parser = argparse.ArgumentParser(description="Cmd line API for generation motions with kimodo") parser.add_argument( "prompt", nargs="?", type=str, default=None, help="Text prompt describing the motion to generate, or several prompts separated by periods.", ) parser.add_argument( "--model", type=str, default=DEFAULT_MODEL, help="Name of the model (e.g. Kimodo-SOMA-RP-v1, etc).", ) parser.add_argument( "--duration", type=str, default="5.0", help="Duration in seconds (default: 5.0). Separate by spaces in a string for different durations per prompts", ) parser.add_argument( "--num_samples", type=int, default=1, help="Number of samples to generate (default: 1)", ) parser.add_argument( "--diffusion_steps", type=int, default=100, help="Number of diffusion steps (default: 100)", ) parser.add_argument( "--num_transition_frames", type=int, default=5, help="Number of frames to help transitioning (default: 5)", ) parser.add_argument( "--constraints", type=str, default=None, help="Saved constraint list", ) parser.add_argument( "--output", type=str, default="output", help="Output stem name: with one sample writes a single file per format (e.g. test.npz, test.csv); with multiple samples creates a folder and writes test_00.npz, test_01.npz, ... inside it. Used for NPZ, AMASS NPZ, CSV, and BVH.", ) parser.add_argument( "--save_example_dir", action="store_true", help=( "Save demo-compatible example directories (each contains motion.npz, constraints.json, meta.json). " "With one sample, writes _example/. With multiple samples, writes " "_examples/_example_00/, _example_01/, ..." ), ) parser.add_argument( "--bvh", action="store_true", help="Also export BVH (SOMA models only); uses the same stem as --output.", ) parser.add_argument( "--bvh_standard_tpose", action="store_true", help="If exporting BVH, export with the rest pose being the standard T-pose rather than the rest pose consistent with the BONES-SEED dataset.", ) parser.add_argument( "--no-postprocess", action="store_true", help="Don't apply motion post-processing to reduce foot skating (ignored for G1)", ) parser.add_argument( "--seed", type=int, default=None, help="Seed for reproducible results", ) parser.add_argument( "--input_folder", type=str, default=None, help="Folder containing meta.json and optional constraints.json. If set, generation settings are loaded from meta.json.", ) parser.add_argument( "--cfg_type", type=str, default=argparse.SUPPRESS, choices=CFG_TYPES, help=( "Classifier-free guidance mode: nocfg (no CFG), regular (single scale on cond vs uncond), " "or separated (custom: separate text and constraint scales). " "Use with --cfg_weight as required by the mode." ), ) parser.add_argument( "--cfg_weight", type=float, nargs="*", default=argparse.SUPPRESS, help=( "CFG scale(s): one float for regular, or two floats [text_weight, constraint_weight] for separated. " "Omit with --cfg_type nocfg. If omitted, two floats alone imply separated; one float alone implies regular." ), ) return parser.parse_args() def get_texts_and_num_frames_from_prompt(prompt: str, duration: str, fps: float): # Get the texts texts = [text.strip() for text in prompt.split(".")] texts = [text + "." for text in texts if text] nb_prompts = len(texts) # Get the durations if " " not in duration: duration_sec = float(duration) # same for all the prompts num_frames = [int(duration_sec * fps)] * nb_prompts else: durations = duration.split(" ") assert len(durations) == len(texts), "The number of durations should match the number of prompts" num_frames = [int(float(duration.strip()) * fps) for duration in durations] assert len(num_frames) == nb_prompts, "The number of durations should be 1 or match the number of texts" return texts, num_frames def _single_file_path(path: str, ext: str) -> str: """Return path for a single output file (no folder). Adds ext if missing; creates parent dirs if any. """ if not path.endswith(ext): path = path.rstrip(os.sep) + ext parent = os.path.dirname(path) if parent: os.makedirs(parent, exist_ok=True) return path def _output_dir_and_path(path: str, default_base: str, ext: str): """Create output folder from path and return (dir_path, path_for_file_with_suffix, base_name). If path has an extension, folder name is the path stem; else the path is the folder name. base_name is the folder basename for _00, _01, ... when n_samples > 1. """ folder = os.path.splitext(path)[0] if os.path.splitext(path)[1] else path os.makedirs(folder, exist_ok=True) base_name = os.path.basename(folder.rstrip(os.sep)) return folder, os.path.join(folder, default_base + ext), base_name def resolve_cfg_kwargs(args: argparse.Namespace, meta: Optional[Dict[str, Any]]) -> Dict[str, Any]: """Resolve cfg_type / cfg_weight for model(...). Precedence: explicit CLI (--cfg_type / --cfg_weight) overrides meta.json ``cfg``; if neither applies, returns {} so the model uses its own defaults. """ ns = vars(args) has_type = "cfg_type" in ns has_wflag = "cfg_weight" in ns cli_type = ns.get("cfg_type") cli_w = ns.get("cfg_weight") if has_wflag: if cli_w is None or len(cli_w) == 0: raise ValueError("--cfg_weight requires one float (regular) or two floats (separated).") if has_type and cli_type == "nocfg": if has_wflag: raise ValueError("--cfg_weight is not used with --cfg_type nocfg.") return {"cfg_type": "nocfg"} if has_type or has_wflag: if has_type: eff_type = cli_type if has_wflag: if eff_type == "regular" and len(cli_w) != 1: raise ValueError("--cfg_type regular requires exactly one --cfg_weight value.") if eff_type == "separated" and len(cli_w) != 2: raise ValueError("--cfg_type separated requires exactly two --cfg_weight values.") else: if eff_type == "regular": raise ValueError("--cfg_type regular requires --cfg_weight with one float.") if eff_type == "separated": raise ValueError("--cfg_type separated requires --cfg_weight with two floats.") else: if len(cli_w) == 1: eff_type = "regular" elif len(cli_w) == 2: eff_type = "separated" else: raise ValueError("--cfg_weight expects 1 float (regular) or 2 floats (separated).") if eff_type == "regular": return {"cfg_type": "regular", "cfg_weight": float(cli_w[0])} return {"cfg_type": "separated", "cfg_weight": [float(cli_w[0]), float(cli_w[1])]} if meta and isinstance(meta.get("cfg"), dict): cfg = meta["cfg"] enabled = cfg.get("enabled", True) if not enabled: return {"cfg_type": "nocfg"} return { "cfg_type": "separated", "cfg_weight": [ float(cfg.get("text_weight", 2.0)), float(cfg.get("constraint_weight", 2.0)), ], } return {} def get_generation_inputs(args, fps: float): """Get texts/num_frames and parameter overrides from either CLI or input_folder.""" if args.input_folder is None: if not args.prompt: raise ValueError("Either provide 'prompt' or '--input_folder'.") texts, num_frames = get_texts_and_num_frames_from_prompt(args.prompt, args.duration, fps) return { "texts": texts, "num_frames": num_frames, "num_samples": args.num_samples, "diffusion_steps": args.diffusion_steps, "seed": args.seed, "constraints_path": args.constraints, "meta": None, } meta_path = os.path.join(args.input_folder, "meta.json") meta = load_json(meta_path) texts, durations_sec = load_prompts_from_meta(meta_path) num_frames = [int(float(duration) * fps) for duration in durations_sec] constraints_path = args.constraints default_constraints_path = os.path.join(args.input_folder, "constraints.json") if constraints_path is None and os.path.exists(default_constraints_path): constraints_path = default_constraints_path return { "texts": texts, "num_frames": num_frames, "num_samples": meta.get("num_samples", args.num_samples), "diffusion_steps": meta.get("diffusion_steps", args.diffusion_steps), "seed": meta.get("seed", args.seed), "constraints_path": constraints_path, "meta": meta, } def main(): device = "cuda:0" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") args = parse_args() # Load model (resolution of name done inside load_model) model, resolved_model = load_model( args.model, device=device, default_family="Kimodo", return_resolved_name=True, ) info = get_model_info(resolved_model) display = info.display_name if info else resolved_model print(f"Loaded model: {display} ({resolved_model})") # Get generation inputs generation_inputs = get_generation_inputs(args, model.fps) texts = generation_inputs["texts"] num_frames = generation_inputs["num_frames"] print("Will generate motions with the following prompts") for text, num_frame in zip(texts, num_frames): print(f" '{text}' with {num_frame} frames") # Load constraints constraints_path = generation_inputs["constraints_path"] if constraints_path: constraint_lst = load_constraints_lst(constraints_path, model.skeleton) else: constraint_lst = [] if constraint_lst: print(f"Using {len(constraint_lst)} set of constraints") for constraint in constraint_lst: print(f" {constraint}") if generation_inputs["seed"] is not None: seed_everything(generation_inputs["seed"]) cfg_kwargs = resolve_cfg_kwargs(args, generation_inputs.get("meta")) if cfg_kwargs: ct = cfg_kwargs.get("cfg_type") cw = cfg_kwargs.get("cfg_weight") if cw is not None: print(f"Using CFG: cfg_type={ct!r}, cfg_weight={cw!r}") else: print(f"Using CFG: cfg_type={ct!r}") # G1: postprocessing is disabled (does not work well for this model). use_postprocess = False if "g1" in resolved_model else (not args.no_postprocess) output = model( texts, num_frames, constraint_lst=constraint_lst, num_denoising_steps=generation_inputs["diffusion_steps"], num_samples=generation_inputs["num_samples"], multi_prompt=True, num_transition_frames=args.num_transition_frames, post_processing=use_postprocess, return_numpy=True, **cfg_kwargs, ) n_samples = int(output["posed_joints"].shape[0]) # Parse the output stem once; all formats (NPZ, AMASS NPZ, CSV, BVH) use this base name. output_base = args.output # Save the NPZ output if n_samples == 1: npz_path = _single_file_path(output_base, ".npz") print(f"Saving the npz output to {npz_path}") single = { k: (v[0] if hasattr(v, "shape") and len(v.shape) > 0 and v.shape[0] == n_samples else v) for k, v in output.items() } save_kimodo_npz(npz_path, single) else: out_dir, _, base_name = _output_dir_and_path(output_base, "motion", ".npz") print(f"Saving the npz output to {out_dir}/ ({base_name}_00.npz ...)") for i in range(n_samples): single = { k: (v[i] if hasattr(v, "shape") and len(v.shape) > 0 and v.shape[0] == n_samples else v) for k, v in output.items() } save_kimodo_npz(os.path.join(out_dir, f"{base_name}_{i:02d}.npz"), single) # Save the AMASS NPZ output if resolved_model == "kimodo-smplx-rp": from kimodo.exports.smplx import AMASSConverter converter = AMASSConverter(skeleton=model.skeleton, fps=model.fps) if n_samples == 1: # Use distinct name so AMASS NPZ does not overwrite the main NPZ amass_single_path = _single_file_path(output_base + "_amass", ".npz") print(f"Saving the amass output to {amass_single_path}") converter.convert_save_npz(output, amass_single_path) else: out_dir, _, base_name = _output_dir_and_path(output_base, "amass", ".npz") print(f"Saving the amass output to {out_dir}/ (amass_00.npz ...)") converter.convert_save_npz(output, os.path.join(out_dir, "amass.npz")) # Save the CSV output if resolved_model == "kimodo-g1-rp": from kimodo.exports.mujoco import MujocoQposConverter converter = MujocoQposConverter(model.skeleton) qpos = converter.dict_to_qpos(output, device) if n_samples == 1: csv_path = _single_file_path(output_base, ".csv") print(f"Saving the csv output to {csv_path}") converter.save_csv(qpos, csv_path) else: out_dir, _, base_name = _output_dir_and_path(output_base, "qpos", ".csv") print(f"Saving the csv output to {out_dir}/ ({base_name}_00.csv ...)") converter.save_csv(qpos, os.path.join(out_dir, base_name + ".csv")) # Save the BVH output if args.bvh: skeleton = model.skeleton if "somaskel" not in skeleton.name: print("BVH export is only supported for SOMA skeletons. Skipping --bvh.") else: from kimodo.exports.bvh import save_motion_bvh from kimodo.skeleton import SOMASkeleton30, global_rots_to_local_rots if isinstance(skeleton, SOMASkeleton30): # Motion has already been converted to somaskel77 within the model for output skeleton = skeleton.somaskel77.to(device) if n_samples == 1: bvh_path = _single_file_path(output_base, ".bvh") print(f"Saving the BVH output to {bvh_path}") joints_pos = torch.from_numpy(output["posed_joints"][0]).to(device) joints_rot = torch.from_numpy(output["global_rot_mats"][0]).to(device) local_rot_mats = global_rots_to_local_rots(joints_rot, skeleton) root_positions = joints_pos[:, skeleton.root_idx, :] save_motion_bvh( bvh_path, local_rot_mats, root_positions, skeleton=skeleton, fps=model.fps, standard_tpose=args.bvh_standard_tpose, ) else: out_dir, _, base_name = _output_dir_and_path(output_base, "motion", ".bvh") print(f"Saving the BVH output to {out_dir}/ ({base_name}_00.bvh ...)") for i in range(n_samples): joints_pos = torch.from_numpy(output["posed_joints"][i]).to(device) joints_rot = torch.from_numpy(output["global_rot_mats"][i]).to(device) local_rot_mats = global_rots_to_local_rots(joints_rot, skeleton) root_positions = joints_pos[:, skeleton.root_idx, :] save_motion_bvh( os.path.join(out_dir, f"{base_name}_{i:02d}.bvh"), local_rot_mats, root_positions, skeleton=skeleton, fps=model.fps, standard_tpose=args.bvh_standard_tpose, ) # Save the example directory if args.save_example_dir: output_stem = os.path.splitext(output_base)[0].rstrip(os.sep) base_name = os.path.basename(output_stem) if n_samples == 1: parent_dir = None example_dirs = [output_stem + "_example"] else: parent_dir = output_stem + "_examples" if os.path.exists(parent_dir): raise FileExistsError(f"Example directory already exists: {parent_dir}") os.makedirs(parent_dir) example_dirs = [ os.path.join(parent_dir, f"{base_name}_example_{i:02d}") for i in range(n_samples) ] durations_sec = [nf / model.fps for nf in num_frames] if len(texts) == 1: meta_info: dict = {"text": texts[0], "duration": durations_sec[0]} else: meta_info = {"texts": texts, "durations": durations_sec} meta_info["num_samples"] = generation_inputs["num_samples"] if generation_inputs["seed"] is not None: meta_info["seed"] = generation_inputs["seed"] meta_info["diffusion_steps"] = generation_inputs["diffusion_steps"] if cfg_kwargs: cfg_type = cfg_kwargs.get("cfg_type", "nocfg") cfg_weight = cfg_kwargs.get("cfg_weight") if cfg_type == "nocfg": meta_info["cfg"] = {"enabled": False} elif cfg_type == "separated" and isinstance(cfg_weight, list) and len(cfg_weight) == 2: meta_info["cfg"] = { "enabled": True, "text_weight": cfg_weight[0], "constraint_weight": cfg_weight[1], } elif cfg_type == "regular" and cfg_weight is not None: meta_info["cfg"] = { "enabled": True, "text_weight": float(cfg_weight), "constraint_weight": float(cfg_weight), } for i, example_dir in enumerate(example_dirs): if os.path.exists(example_dir): raise FileExistsError(f"Example directory already exists: {example_dir}") os.makedirs(example_dir) sample = { k: (v[i] if hasattr(v, "shape") and len(v.shape) > 0 and v.shape[0] == n_samples else v) for k, v in output.items() } save_kimodo_npz(os.path.join(example_dir, "motion.npz"), sample) if constraints_path: shutil.copy2(constraints_path, os.path.join(example_dir, "constraints.json")) save_json(os.path.join(example_dir, "meta.json"), meta_info) if parent_dir is None: print(f"Saved demo example to {example_dirs[0]}") else: print(f"Saved {n_samples} demo examples to {parent_dir}/") if __name__ == "__main__": main() ================================================ FILE: kimodo/scripts/gradio_theme.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import gradio as gr def get_gradio_theme(remove_gradio_footer=False): theme = gr.themes.Base( primary_hue="blue", text_size=gr.themes.Size(lg="16px", md="14px", sm="12px", xl="22px", xs="10px", xxl="35px", xxs="9px"), font=[ gr.themes.GoogleFont("Source Sans Pro"), "BlinkMacSystemFont", "Segoe UI", "Roboto", ], ).set( body_text_color="*neutral_900", body_text_color_subdued="*neutral_500", body_text_color_subdued_dark="*neutral_500", ) css = """ @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700;900&display=swap'); /* Base text */ body, .gradio-container { font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', sans-serif !important; font-size: 16px !important; } h1 { // font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important; font-weight: 700 !important; font-size: 2.75rem !important; // margin: 0px; padding: 1.5rem 0px 0px 0px; // line-height: 1.2; } h2 { // font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important; font-weight: 600 !important; font-size: 1.5rem !important; } """ if remove_gradio_footer: css += """ footer { display: none !important; } """ return theme, css ================================================ FILE: kimodo/scripts/lock_requirements.py ================================================ #!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Regenerate `docker_requirements.txt` from `docker_requirements.in` using `uv`, targeting the Docker image runtime, and filter out `torch` + CUDA wheels so Docker doesn't try to reinstall PyTorch. Usage: python3 kimodo/scripts/lock_requirements.py Optional args: --python-version 3.10 --python-platform x86_64-manylinux2014 --in docker_requirements.in --out docker_requirements.txt """ import argparse import shutil import subprocess from pathlib import Path from typing import Iterable DEFAULT_PYTHON_VERSION = "3.10" DEFAULT_PYTHON_PLATFORM = "x86_64-manylinux2014" # Packages to omit from the lockfile because the Docker base image already provides torch+CUDA. OMIT_NAMES = {"torch", "triton", "networkx", "sympy", "mpmath"} OMIT_PREFIXES = ("nvidia-",) def _run(cmd: list[str]) -> None: print("+", " ".join(cmd)) subprocess.run(cmd, check=True) def _ensure_uv() -> None: if shutil.which("uv") is None: raise SystemExit( "ERROR: `uv` is not installed or not on PATH.\n" "Install it (one of):\n" " - pipx install uv\n" " - python -m pip install --user uv\n" "Then rerun this script." ) def _parse_req_name(line: str) -> str: # uv emits `name==version` lines. s = line.strip() if "==" in s: return s.split("==", 1)[0].strip() # Fallback: treat the whole token before space as name. return s.split()[0].strip() def _iter_blocks(lines: list[str]) -> Iterable[list[str]]: """Split a docker_requirements.txt into blocks: [top-level req line + indented comments].""" i = 0 n = len(lines) while i < n: line = lines[i] # Header/comments/blank if line.startswith("#") or line.strip() == "": yield [line] i += 1 continue # Top-level requirement line if not line.startswith(" "): block = [line] i += 1 while i < n and (lines[i].startswith(" ") or lines[i].strip() == "" or lines[i].startswith("#")): # Stop if we hit another top-level requirement line if not lines[i].startswith(" ") and not lines[i].startswith("#") and lines[i].strip() != "": break block.append(lines[i]) i += 1 yield block continue # Indented line without a requirement header (shouldn't happen, but keep) yield [line] i += 1 def _should_omit(req_line: str) -> bool: name = _parse_req_name(req_line) if name in OMIT_NAMES: return True for pfx in OMIT_PREFIXES: if name.startswith(pfx): return True return False def filter_lockfile(path: Path) -> None: lines = path.read_text(encoding="utf-8").splitlines(True) out: list[str] = [] inserted_note = False for block in _iter_blocks(lines): first = block[0] # After the uv header lines, insert a short note once. if (not inserted_note) and first.startswith("# This file was autogenerated by uv"): out.extend(block) out.append( "# NOTE: `torch` (and its CUDA wheels) are intentionally omitted from this lockfile.\n" "# The Docker base image (nvcr.io/nvidia/pytorch) already provides a tested PyTorch build.\n" "#\n" ) inserted_note = True continue if first.startswith("#") or first.strip() == "": out.extend(block) continue if _should_omit(first): continue out.extend(block) path.write_text("".join(out), encoding="utf-8") def main() -> None: ap = argparse.ArgumentParser() ap.add_argument("--in", dest="in_file", default="docker_requirements.in") ap.add_argument("--out", dest="out_file", default="docker_requirements.txt") ap.add_argument("--python-version", default=DEFAULT_PYTHON_VERSION) ap.add_argument("--python-platform", default=DEFAULT_PYTHON_PLATFORM) args = ap.parse_args() _ensure_uv() in_path = Path(args.in_file) out_path = Path(args.out_file) if not in_path.exists(): raise SystemExit(f"ERROR: missing {in_path}") _run( [ "uv", "pip", "compile", "-U", str(in_path), "-o", str(out_path), "--python-version", args.python_version, "--python-platform", args.python_platform, ] ) filter_lockfile(out_path) print(f"OK: wrote {out_path} (filtered torch/CUDA wheels)") if __name__ == "__main__": main() ================================================ FILE: kimodo/scripts/motion_convert.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """CLI entry-point for motion format conversion. Library conversion logic lives in :mod:`kimodo.exports.motion_convert_lib`. Format detection utilities live in :mod:`kimodo.exports.motion_formats`. """ from __future__ import annotations import argparse import sys from kimodo.exports.motion_convert_lib import convert_motion_files def run_convert( input_path: str, output_path: str, from_fmt: str | None, to_fmt: str | None, source_fps: float | None, z_up: bool, mujoco_rest_zero: bool, bvh_standard_tpose: bool = False, ) -> None: """Thin wrapper kept for backward compatibility; delegates to :func:`convert_motion_files`.""" convert_motion_files( input_path, output_path, from_fmt=from_fmt, to_fmt=to_fmt, source_fps=source_fps, z_up=z_up, mujoco_rest_zero=mujoco_rest_zero, bvh_standard_tpose=bvh_standard_tpose, ) def build_argparser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( description="Convert Kimodo NPZ, AMASS NPZ, SOMA BVH, and G1 MuJoCo CSV.", ) p.add_argument("input", help="Input file path") p.add_argument("output", help="Output file path") p.add_argument( "--from", dest="from_fmt", choices=("amass", "kimodo", "soma-bvh", "g1-csv"), default=None, help="Input format (default: infer from file contents/extension)", ) p.add_argument( "--to", dest="to_fmt", choices=("kimodo", "amass", "soma-bvh", "g1-csv"), default=None, help="Output format (default: infer from output extension)", ) p.add_argument( "--source-fps", "--fps", dest="source_fps", type=float, default=None, help=( "Source motion frame rate in Hz (default: auto-detected from " "BVH Frame Time / AMASS mocap_frame_rate, or 30 Hz). " "Kimodo NPZ output is always resampled to 30 Hz." ), ) p.add_argument( "--no-z-up", action="store_true", help="For AMASS paths: disable Z-up transform (treat trans/orient as already Kimodo Y-up).", ) p.add_argument( "--mujoco-rest-zero", action="store_true", default=False, help="For G1 CSV: joint angles relative to MuJoCo rest (must match export).", ) p.add_argument( "--bvh_standard_tpose", action="store_true", default=False, help="If input or output is BVH: the BVH file uses the standard T-pose as its rest pose instead of the BONES-SEED rest pose.", ) return p def main(argv: list[str] | None = None) -> int: args = build_argparser().parse_args(argv) try: convert_motion_files( args.input, args.output, from_fmt=args.from_fmt, to_fmt=args.to_fmt, source_fps=args.source_fps, z_up=not args.no_z_up, mujoco_rest_zero=args.mujoco_rest_zero, bvh_standard_tpose=args.bvh_standard_tpose, ) except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 return 0 if __name__ == "__main__": raise SystemExit(main()) ================================================ FILE: kimodo/scripts/mujoco_load.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import time import mujoco import mujoco.viewer import numpy as np from kimodo.assets import skeleton_asset_path qpos = np.loadtxt("motion.csv", delimiter=",") model = mujoco.MjModel.from_xml_path(str(skeleton_asset_path("g1skel34", "xml", "g1.xml"))) data = mujoco.MjData(model) fps = 30 # adjust to your intended playback rate with mujoco.viewer.launch_passive(model, data) as viewer: # loop the motion while viewer.is_running(): for frame in qpos: data.qpos[:] = frame mujoco.mj_forward(model, data) viewer.sync() time.sleep(1.0 / fps) ================================================ FILE: kimodo/scripts/run_text_encoder_server.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import argparse import os import gradio as gr import numpy as np from kimodo.model import resolve_target from .gradio_theme import get_gradio_theme os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES" DEFAULT_TEXT = "A person walks and falls to the ground." DEFAULT_SERVER_NAME = "0.0.0.0" DEFAULT_SERVER_PORT = 9550 DEFAULT_TMP_FOLDER = "/tmp/text_encoder/" DEFAULT_TEXT_ENCODER = "llm2vec" TEXT_ENCODER_PRESETS = { "llm2vec": { "target": "kimodo.model.LLM2VecEncoder", "kwargs": { "base_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp", "peft_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised", "dtype": "bfloat16", "llm_dim": 4096, "device": "auto", }, "display_name": "LLM2Vec", } } class DemoWrapper: def __init__(self, text_encoder, tmp_folder): self.text_encoder = text_encoder self.tmp_folder = tmp_folder def __call__(self, text, filename, progress=gr.Progress()): # Compute text embedding tensor, length = self.text_encoder(text) embedding = tensor[:length] embedding = embedding.cpu().numpy() # Save text embedding path = os.path.join(self.tmp_folder, filename) np.save(path, embedding) output_title = gr.Markdown(visible=True) output_text = gr.Markdown(visible=True, value=f"Text: {text}") download = gr.DownloadButton(visible=True, value=path) return download, output_title, output_text def _get_env(name: str, default): return os.getenv(name, default) def _build_text_encoder(name: str, fp32: bool = False): if name not in TEXT_ENCODER_PRESETS: available = ", ".join(sorted(TEXT_ENCODER_PRESETS)) raise ValueError(f"Unknown TEXT_ENCODER='{name}'. Available: {available}") preset = TEXT_ENCODER_PRESETS[name] target_cls = resolve_target(preset["target"]) if fp32: preset["kwargs"]["dtype"] = "float32" return target_cls(**preset["kwargs"]) def parse_args(): parser = argparse.ArgumentParser(description="Run text encoder Gradio server.") parser.add_argument( "--text-encoder", default=_get_env("TEXT_ENCODER", DEFAULT_TEXT_ENCODER), choices=sorted(TEXT_ENCODER_PRESETS.keys()), help="Text encoder preset.", ) parser.add_argument( "--tmp-folder", default=_get_env("TEXT_ENCODER_TMP_FOLDER", DEFAULT_TMP_FOLDER), ) parser.add_argument( "--fp32", action="store_true", help="Uses fp32 for the text encoder rather than default bfloat16.", ) return parser.parse_args() def main(): args = parse_args() server_name = _get_env("GRADIO_SERVER_NAME", DEFAULT_SERVER_NAME) server_port = int(_get_env("GRADIO_SERVER_PORT", DEFAULT_SERVER_PORT)) theme, css = get_gradio_theme() os.makedirs(args.tmp_folder, exist_ok=True) text_encoder = _build_text_encoder(args.text_encoder, args.fp32) display_name = TEXT_ENCODER_PRESETS[args.text_encoder]["display_name"] demo_wrapper_fn = DemoWrapper(text_encoder, args.tmp_folder) with gr.Blocks(title="Text encoder", css=css, theme=theme) as demo: gr.Markdown(f"# Text encoder: {display_name}") gr.Markdown("## Description") gr.Markdown("Get a embeddings from a text.") gr.Markdown("## Inputs") with gr.Row(): text = gr.Textbox( placeholder="Type the motion you want to generate with a sentence", show_label=True, label="Text prompt", value=DEFAULT_TEXT, type="text", ) with gr.Row(scale=3): with gr.Column(scale=1): btn = gr.Button("Encode", variant="primary") with gr.Column(scale=1): clear = gr.Button("Clear", variant="secondary") with gr.Column(scale=3): pass output_title = gr.Markdown("## Outputs", visible=False) output_text = gr.Markdown("", visible=False) with gr.Row(scale=3): with gr.Column(scale=1): download = gr.DownloadButton("Download", variant="primary", visible=False) with gr.Column(scale=4): pass filename = gr.Textbox( visible=False, value="embedding.npy", ) def clear_fn(): return [ gr.DownloadButton(visible=False), gr.Markdown(visible=False), gr.Markdown(visible=False), ] outputs = [download, output_title, output_text] gr.on( triggers=[text.submit, btn.click], fn=clear_fn, inputs=None, outputs=outputs, ).then( fn=demo_wrapper_fn, inputs=[text, filename], outputs=outputs, ) def download_file(): return gr.DownloadButton() download.click( fn=download_file, inputs=None, outputs=[download], ) clear.click(fn=clear_fn, inputs=None, outputs=outputs) demo.launch(server_name=server_name, server_port=server_port) if __name__ == "__main__": main() ================================================ FILE: kimodo/skeleton/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Skeleton definitions and utilities used across kimodo.""" from .base import SkeletonBase from .definitions import ( G1Skeleton34, SMPLXSkeleton22, SOMASkeleton30, SOMASkeleton77, ) from .kinematics import batch_rigid_transform, fk from .registry import build_skeleton from .transforms import global_rots_to_local_rots, to_standard_tpose __all__ = [ "SkeletonBase", "G1Skeleton34", "SOMASkeleton30", "SOMASkeleton77", "SMPLXSkeleton22", "batch_rigid_transform", "fk", "build_skeleton", "global_rots_to_local_rots", "to_standard_tpose", ] ================================================ FILE: kimodo/skeleton/base.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Base skeleton class: hierarchy, joint metadata, and helpers for kinematics and motion.""" from pathlib import Path from typing import Optional import torch from kimodo.assets import skeleton_asset_path from .kinematics import fk from .transforms import ( from_standard_tpose, global_rots_to_local_rots, to_standard_tpose, ) class SkeletonBase(torch.nn.Module): """Base class that stores a skeleton hierarchy and helper metadata. Subclasses define the static joint layout (joint names and parent links) and semantic groups (feet, hands, hips). This class builds index mappings, parent tensors, and convenience helpers used by kinematics, constraints, and motion conversion utilities. """ # these should be defined in the subclass name = None bone_order_names_with_parents = None bone_order_names_no_root = None root_idx = None foot_joint_names = None foot_joint_idx = None hip_joint_names = None # in order [right, left] hip_joint_idx = None # in order [right, left] def __init__( self, folder: Optional[str] = None, name: Optional[str] = None, load: bool = True, **kwargs, # to catch addition args in configs ): """Initialize a skeleton instance and optional neutral-pose assets. Args: folder: Folder containing serialized skeleton assets (for example `joints.p` and optional `standard_t_pose_global_offsets_rots.p`). name: Optional runtime name used to validate subclass compatibility. load: Whether to load tensor assets from `folder`. **kwargs: Unused extra config keys kept for config compatibility. """ super().__init__() if name is not None: # Check that the name is not too far from the actual skeleton class name assert self.name in name self.name = name if folder is None: # Take the skeleton asset folder of the repo from the name # in case we don't override it folder = str(skeleton_asset_path(self.name)) self.folder = folder self.dim = len(self.bone_order_names_with_parents) if load and folder is not None: pfolder = Path(folder) neutral_joints = torch.load(pfolder / "joints.p").squeeze() self.register_buffer("neutral_joints", neutral_joints, persistent=False) if (pfolder / "bvh_joints.p").exists(): bvh_neutral_joints = torch.load(pfolder / "bvh_joints.p").squeeze() self.register_buffer("bvh_neutral_joints", bvh_neutral_joints, persistent=False) global_offset_path = pfolder / "standard_t_pose_global_offsets_rots.p" if global_offset_path.exists(): global_rot_offsets = torch.load(global_offset_path).squeeze() self.register_buffer("global_rot_offsets", global_rot_offsets, persistent=False) # Usefull for g1, where the rest pose is not zero baked_rest_path = pfolder / "rest_pose_local_rot.p" if baked_rest_path.exists(): rest_pose_local_rot = torch.load(baked_rest_path).squeeze() self.register_buffer("rest_pose_local_rot", rest_pose_local_rot, persistent=False) self.bone_order_names = [x for x, y in self.bone_order_names_with_parents] self.bone_parents = dict(self.bone_order_names_with_parents) self.bone_index = {x: idx for idx, x in enumerate(self.bone_order_names)} self.bone_order_names_index = self.bone_index # create the parents tensor on the fly joint_parents = torch.tensor( [-1 if (y := self.bone_parents[x]) is None else self.bone_index[y] for x in self.bone_order_names] ) self.register_buffer("joint_parents", joint_parents, persistent=False) self.nbjoints = len(self.bone_order_names) # check lengths assert self.nbjoints == len(self.joint_parents) if "neutral_joints" in self.__dict__: assert self.nbjoints == len(self.neutral_joints) root_indices = torch.where(joint_parents == -1)[0] assert len(root_indices) == 1 # should be one root only self.root_idx = root_indices[0].item() if "neutral_joints" in self.__dict__: assert (self.neutral_joints[0] == 0).all() # remove the root self.bone_order_names_no_root = ( self.bone_order_names[: self.root_idx] + self.bone_order_names[self.root_idx + 1 :] ) self.foot_joint_names = self.left_foot_joint_names + self.right_foot_joint_names self.foot_joint_names_index = {x: idx for idx, x in enumerate(self.foot_joint_names)} self.left_foot_joint_idx = [ self.bone_order_names.index(foot_joint) for foot_joint in self.left_foot_joint_names ] self.right_foot_joint_idx = [ self.bone_order_names.index(foot_joint) for foot_joint in self.right_foot_joint_names ] self.foot_joint_idx = self.left_foot_joint_idx + self.right_foot_joint_idx self.hip_joint_idx = [self.bone_order_names.index(hip_joint) for hip_joint in self.hip_joint_names] def expand_joint_names(self, joint_names): """Expand base EE names [LeftFoot, RightFoot, LeftHand, RightHand] actual joint names to constrain position and rotations. Args: joint_names: list of list of base EE names to constrain Returns: rot_joint_names: list of list of joint names to constrain rotations pos_joint_names: list of list of joint names to constrain positions """ base_ee = ["LeftFoot", "RightFoot", "LeftHand", "RightHand", "Hips"] pelvis_name = self.bone_order_names[self.root_idx] base_pos_names = [ self.left_foot_joint_names, self.right_foot_joint_names, self.left_hand_joint_names, self.right_hand_joint_names, [pelvis_name], ] # base of each chain base_rot_names = [ self.left_foot_joint_names[:1], self.right_foot_joint_names[:1], self.left_hand_joint_names[:1], self.right_hand_joint_names[:1], [pelvis_name], ] rot_joint_names = [] pos_joint_names = [] # loop through each EE joint group to constrain in the current keyframe for jname in joint_names: idx = base_ee.index(jname) rot_joint_names += base_rot_names[idx] pos_joint_names += base_pos_names[idx] return rot_joint_names, pos_joint_names def expand_joint_names_batched(self, joint_names): """Expand base EE names [LeftFoot, RightFoot, LeftHand, RightHand] actual joint names to constrain position and rotations. Args: joint_names: list of list of base EE names to constrain Returns: rot_joint_names: list of list of joint names to constrain rotations pos_joint_names: list of list of joint names to constrain positions """ base_ee = ["LeftFoot", "RightFoot", "LeftHand", "RightHand", "Hips"] pelvis_name = self.bone_order_names[self.root_idx] base_pos_names = [ self.left_foot_joint_names, self.right_foot_joint_names, self.left_hand_joint_names, self.right_hand_joint_names, [pelvis_name], ] # base of each chain base_rot_names = [ self.left_foot_joint_names[:1], self.right_foot_joint_names[:1], self.left_hand_joint_names[:1], self.right_hand_joint_names[:1], [pelvis_name], ] # loop through each keyframe rot_joint_names = [] pos_joint_names = [] for key_joint_names in joint_names: key_rot_names = [] key_pos_names = [] # loop through each EE joint group to constrain in the current keyframe for jname in key_joint_names: idx = base_ee.index(jname) key_rot_names += base_rot_names[idx] key_pos_names += base_pos_names[idx] rot_joint_names.append(key_rot_names) pos_joint_names.append(key_pos_names) return rot_joint_names, pos_joint_names def __repr__(self): if self.folder is None: return f"{self.__class__.__name__}()" return f'{self.__class__.__name__}(folder="{self.folder}")' @property def device(self): """Device where neutral-joint buffers are stored. Returns 'cpu' if neutral_joints is not present. """ if getattr(self, "neutral_joints", None) is None: return "cpu" return self.neutral_joints.device def fk(self, local_joint_rots: torch.Tensor, root_positions: torch.Tensor): """Run forward kinematics for this skeleton layout. Args: local_joint_rots: Local joint rotation matrices with shape `(..., J, 3, 3)`. root_positions: Root translations with shape `(..., 3)`. Returns: Tuple of `(global_joint_rots, posed_joints, posed_joints_norootpos)`. """ global_joint_rots, posed_joints, posed_joints_norootpos = fk(local_joint_rots, root_positions, self) return global_joint_rots, posed_joints, posed_joints_norootpos def to_standard_tpose(self, local_rot_mats: torch.Tensor): """Convert local rotations into the skeleton's standard T-pose frame.""" return to_standard_tpose(local_rot_mats, self) def from_standard_tpose(self, local_rot_mats: torch.Tensor): """Convert local rotations from the skeleton's standard T-pose frame.""" return from_standard_tpose(local_rot_mats, self) def global_rots_to_local_rots(self, global_joint_rots: torch.Tensor): """Convert global joint rotations to local rotations for this hierarchy.""" return global_rots_to_local_rots(global_joint_rots, self) def get_skel_slice(self, skeleton: "SkeletonBase"): """Build index mapping from another skeleton into this skeleton order. Args: skeleton: Source skeleton whose joint order is used by input tensors. Returns: A list of source indices ordered as `self.bone_order_names`. Raises: ValueError: If at least one required joint is missing from `skeleton`. """ try: skel_slice = [skeleton.bone_index[x] for x in self.bone_order_names] except KeyError: raise ValueError("The current skeleton contain joints that are not in the input") return skel_slice ================================================ FILE: kimodo/skeleton/bvh.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """BVH parsing utilities and skeleton/animation conversion helpers.""" import re from typing import Optional, Tuple import numpy as np import torch from scipy.spatial.transform import Rotation class BvhNode: """Lightweight tree node used to represent parsed BVH hierarchy lines.""" def __init__(self, value=[], parent=None): """Create a node from tokenized BVH line values.""" self.value = value self.children = [] self.parent = parent if self.parent: self.parent.add_child(self) def add_child(self, item): """Attach a child node and set its parent reference.""" item.parent = self self.children.append(item) def filter(self, key): """Yield direct children whose first token matches `key`.""" for child in self.children: if child.value[0] == key: yield child def __iter__(self): for child in self.children: yield child def __getitem__(self, key): """Return all tokens following `key` from the first matching child node.""" for child in self.children: for index, item in enumerate(child.value): if item == key: if index + 1 >= len(child.value): return None else: return child.value[index + 1 :] raise IndexError("key {} not found".format(key)) def __repr__(self): return str(" ".join(self.value)) @property def name(self): """Joint name for `ROOT`/`JOINT` entries.""" return self.value[1] class Bvh: """Parsed BVH file with hierarchy graph and per-frame channel values.""" def __init__(self, data: str, backend: Optional[str] = "graph"): """ Args: data: Raw BVH file content. backend: Parsing mode. `"graph"` keeps list-based frame storage, while `"np"` precomputes a NumPy array and index caches. """ self.data = data self.root = BvhNode() self.frames = [] self.backend = backend self.tokenize() if self.backend == "np": # cache important info for quick access later self.build_data_array() elif self.backend == "graph": pass else: raise ValueError(f"Unknown backend for BVH loading: {backend}") def build_data_array(self): """Build cached channel indices and contiguous frame data for `"np"` backend.""" joints = self.get_joints() self.joint2idx = dict() self.joint2channels = dict() cur_idx = 0 for joint in joints: self.joint2idx[joint.value[1]] = cur_idx cur_idx += int(joint["CHANNELS"][0]) self.joint2channels[joint.value[1]] = joint["CHANNELS"][1:] self.np_data_array = np.array(self.frames, dtype=np.float32) def tokenize(self): """Tokenize BVH text and populate hierarchy plus frame values.""" first_round = [] accumulator = "" for char in self.data: if char not in ("\n", "\r"): accumulator += char elif accumulator: first_round.append(re.split("\\s+", accumulator.strip())) accumulator = "" node_stack = [self.root] frame_time_found = False node = None for item in first_round: if frame_time_found: self.frames.append(item) continue key = item[0] if key == "{": node_stack.append(node) elif key == "}": node_stack.pop() else: node = BvhNode(item) # print("new node: ", node, "\nparent: ", node_stack[-1]) node_stack[-1].add_child(node) if item[0] == "Frame" and item[1] == "Time:": frame_time_found = True def search(self, *items): """Depth-first search for nodes matching a prefix of tokens.""" found_nodes = [] def check_children(node): if len(node.value) >= len(items): failed = False for index, item in enumerate(items): if node.value[index] != item: failed = True break if not failed: found_nodes.append(node) for child in node: check_children(child) check_children(self.root) return found_nodes def get_joints(self): """Return all `ROOT`/`JOINT` hierarchy joints in BVH traversal order.""" joints = [] def iterate_joints(joint): joints.append(joint) for child in joint.filter("JOINT"): iterate_joints(child) iterate_joints(next(self.root.filter("ROOT"))) return joints def get_joints_names(self): """Return joint names in the same order as :meth:`get_joints`.""" joints = [] def iterate_joints(joint): joints.append(joint.value[1]) for child in joint.filter("JOINT"): iterate_joints(child) iterate_joints(next(self.root.filter("ROOT"))) return joints def joint_direct_children(self, name): """Return direct child joints of the given joint name.""" joint = self.get_joint(name) return [child for child in joint.filter("JOINT")] def get_joint_index(self, name): """Return hierarchy index of the named joint.""" return self.get_joints().index(self.get_joint(name)) def get_joint(self, name): """Return hierarchy node for a joint name.""" found = self.search("ROOT", name) if not found: found = self.search("JOINT", name) if found: return found[0] raise LookupError("joint not found") def joint_offset(self, name, idx=[0, 1, 2]): """Return selected `OFFSET` components for a joint.""" joint = self.get_joint(name) offset = joint["OFFSET"] if len(offset) < max(idx): return None return (float(offset[idx[0]]), float(offset[idx[1]]), float(offset[idx[2]])) def joint_offset_rot(self, name): """Return optional rotational offset components from custom BVH files.""" return self.joint_offset(name, idx=[3, 4, 5]) def joint_channels(self, name): """Return channel names declared for a joint.""" if self.backend == "np": return self.joint2channels[name] else: joint = self.get_joint(name) return joint["CHANNELS"][1:] def get_joint_channels_index(self, joint_name): """Return the flattened starting channel index for one joint.""" if self.backend == "np": return self.joint2idx[joint_name] else: index = 0 for joint in self.get_joints(): if joint.value[1] == joint_name: return index index += int(joint["CHANNELS"][0]) raise LookupError("joint not found") def get_joint_channel_index(self, joint, channel): """Return per-joint channel offset for a specific channel name.""" channels = self.joint_channels(joint) if channel in channels: channel_index = channels.index(channel) else: raise ValueError(f"Channel {channel} not found in {channels}") return channel_index def frame_joint_channel(self, frame_index, joint, channel, value=None): """Return one channel value for one joint at one frame index.""" joint_index = self.get_joint_channels_index(joint) channel_index = self.get_joint_channel_index(joint, channel) if channel_index == -1 and value is not None: return value if self.backend == "np": return self.np_data_array[frame_index, joint_index + channel_index] else: return float(self.frames[frame_index][joint_index + channel_index]) def frame_joint_channels(self, frame_index, joint, channels, value=None): """Get single frame data for on specific joint from multiple specific channels (e.g. Xrotation, Yrotation, Zrotation).""" values = [] joint_index = self.get_joint_channels_index(joint) if self.backend == "np": channel_idx = [self.get_joint_channel_index(joint, channel) for channel in channels] channel_idx = np.array(channel_idx) + joint_index values = self.np_data_array[frame_index, channel_idx] else: for channel in channels: channel_index = self.get_joint_channel_index(joint, channel) if channel_index == -1 and value is not None: values.append(value) else: values.append(float(self.frames[frame_index][joint_index + channel_index])) return values def frames_joint_channels(self, joint, channels, value=None): """Get all frame data for one joint from multiple channels (e.g. Xrotation, Yrotation, Zrotation).""" joint_index = self.get_joint_channels_index(joint) if self.backend == "np": channel_idx = [self.get_joint_channel_index(joint, channel) for channel in channels] channel_idx = np.array(channel_idx) + joint_index all_frames = self.np_data_array[:, channel_idx] else: all_frames = [] for frame in self.frames: values = [] for channel in channels: channel_index = self.get_joint_channel_index(joint, channel) if channel_index == -1 and value is not None: values.append(value) else: values.append(float(frame[joint_index + channel_index])) all_frames.append(values) return all_frames def frames_joints_channels(self, joint_names, channels): """Get all frames for all specified joints with one specified set of channels.""" if self.backend != "np": raise NotImplementedError("Only np backend is supported for this function") joint_indices = [(joint_name, self.joint2idx[joint_name]) for joint_name in joint_names] data_indices = [] for joint_name, joint_idx in joint_indices: channel_indices = [self.get_joint_channel_index(joint_name, channel) for channel in channels] data_indices.extend([joint_idx + channel_idx for channel_idx in channel_indices]) all_frames = self.np_data_array[:, data_indices] all_frames = all_frames.reshape(-1, len(joint_names), len(channels)) return all_frames def joint_parent(self, name): """Return parent joint node, or `None` for the root.""" joint = self.get_joint(name) if joint.parent == self.root: return None return joint.parent def joint_parent_index(self, name): """Return parent joint index, or `-1` for the root.""" joint = self.get_joint(name) if joint.parent == self.root: return -1 return self.get_joints().index(joint.parent) @property def nframes(self): """Number of motion frames declared in the BVH header.""" try: return int(next(self.root.filter("Frames:")).value[1]) except StopIteration: raise LookupError("number of frames not found") @property def frame_time(self): """Frame duration in seconds declared in the BVH header.""" try: return float(next(self.root.filter("Frame")).value[2]) except StopIteration: raise LookupError("frame time not found") class Bone: """Container for one skeleton bone and its kinematic metadata.""" def __init__(self): # original bone info self.id = None self.name = None self.orient = np.identity(3) self.dof_index = [] self.channels = [] # bvh only self.lb = [] self.ub = [] self.parent = None self.child = [] # asf specific self.dir = np.zeros(3) self.len = 0 # bvh specific self.offset = np.zeros(3) # default offset for position self.offset_rot = None # rotation for custom nv bvh # inferred info self.pos = np.zeros(3) self.end = np.zeros(3) def __repr__(self): return f"{self.name}" class SkeletonBvh: """Skeleton structure reconstructed from BVH hierarchy metadata.""" def __init__(self): self.bones = [] self.name2bone = {} self.mass_scale = 1.0 self.len_scale = 1.0 self.dof_name = ["x", "y", "z"] self.root = None def get_bones_names(self): """Return bone names in skeleton order.""" return [x.name for x in self.bones] def get_parent_indices(self): """Return parent index array aligned with `self.bones`.""" parent_indices = [-1] * len(self.bones) for bone in self.bones: if bone.parent: parent_indices[bone.id] = bone.parent.id return parent_indices def get_neutral_joints(self): """Return neutral/rest joint positions as a NumPy array `(J, 3)`.""" joints = [] for bone in self.bones: joints.append(bone.pos) joints = np.stack(joints, axis=0) return joints def load_from_bvh(self, fname, exclude_bones=None, spec_channels=None, mocap=None): """Load skeleton hierarchy and rest offsets from a BVH file. Args: fname: Path to a BVH file (ignored when *mocap* is given). exclude_bones: Bone-name substrings to ignore while constructing the skeleton. spec_channels: Optional per-joint channel overrides. mocap: Pre-parsed :class:`Bvh` object. When provided the file is not re-read from disk. """ if exclude_bones is None: exclude_bones = {} if spec_channels is None: spec_channels = dict() if mocap is None: with open(fname) as f: mocap = Bvh(f.read()) joint_names = list( filter( lambda x: all([t not in x for t in exclude_bones]), mocap.get_joints_names(), ) ) dof_ind = {"x": 0, "y": 1, "z": 2} self.len_scale = 1.0 self.root = Bone() self.root.id = 0 self.root.name = joint_names[0] self.root.channels = mocap.joint_channels(self.root.name) self.root.offset = np.array(mocap.joint_offset(self.root.name)) * self.len_scale self.root.offset_rot = mocap.joint_offset_rot(self.root.name) if self.root.offset_rot is not None: self.root.offset_rot = np.array(self.root.offset_rot) # self.root.offset = np.zeros_like(self.root.offset) # TODO: remove this self.name2bone[self.root.name] = self.root self.bones.append(self.root) for i, joint in enumerate(joint_names[1:]): bone = Bone() bone.id = i + 1 bone.name = joint bone.channels = spec_channels[joint] if joint in spec_channels.keys() else mocap.joint_channels(joint) bone.dof_index = [dof_ind[x[0].lower()] for x in bone.channels] bone.offset = np.array(mocap.joint_offset(joint)) * self.len_scale bone.offset_rot = mocap.joint_offset_rot(joint) if bone.offset_rot is not None: bone.offset_rot = np.array(bone.offset_rot) bone.lb = [-180.0] * 3 bone.ub = [180.0] * 3 self.bones.append(bone) self.name2bone[joint] = bone # for bone in self.bones: # print(bone.name, bone.channels, bone.offset) for bone in self.bones[1:]: parent_name = mocap.joint_parent(bone.name).name if parent_name in self.name2bone.keys(): bone_p = self.name2bone[parent_name] bone_p.child.append(bone) bone.parent = bone_p self.forward_bvh(self.root) for bone in self.bones: if len(bone.child) == 0: child_vals = [str(node) for node in mocap.get_joint(bone.name).children] if "End Site" in child_vals: end_site_idx = child_vals.index("End Site") end_site_offset = mocap.get_joint(bone.name).children[end_site_idx]["OFFSET"] bone.end = bone.pos + np.array([float(x) for x in end_site_offset]) * self.len_scale else: pass else: bone.end = sum([bone_c.pos for bone_c in bone.child]) / len(bone.child) def forward_bvh(self, bone): """Recursively accumulate absolute joint positions from local offsets.""" if bone.parent: bone.pos = bone.parent.pos + bone.offset else: bone.pos = bone.offset for bone_c in bone.child: self.forward_bvh(bone_c) def load_bvh_animation( fname: str, skeleton: SkeletonBvh, rot_order: Optional[str] = "native", backend: Optional[str] = "np", return_quat: Optional[bool] = False, mocap: Optional["Bvh"] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: """Load motion channels from BVH into root translations and joint rotations. Args: fname: Full path to the BVH file (ignored when *mocap* is given). skeleton: Parsed neutral skeleton built from compatible BVH hierarchy. rot_order: Euler order to use for conversion (`"native"` keeps BVH order). backend: BVH parser backend (`"np"` or `"graph"`). return_quat: If `True`, return quaternions instead of rotation matrices. mocap: Pre-parsed :class:`Bvh` object. When provided the file is not re-read from disk. Returns: Root translations `(T, 3)` and joint rotations `(T, J, 3, 3)` or `(T, J, 4)` when `return_quat=True`. """ if mocap is None: with open(fname) as f: mocap = Bvh(f.read(), backend=backend) # assume all joints are same ordering, load in with native ordering root_channels = mocap.joint_channels(skeleton.root.name) pos_channels = [channel for channel in root_channels if channel.endswith("position")] rot_channels = [channel for channel in root_channels if channel.endswith("rotation")] root_trans = np.array(mocap.frames_joint_channels(skeleton.root.name, pos_channels)) effective_backend = mocap.backend if effective_backend == "np": # NOTE: assumes rot channel ordering is the same for all joints joint_eulers = mocap.frames_joints_channels(skeleton.get_bones_names(), rot_channels) joint_eulers = np.deg2rad(joint_eulers) elif effective_backend == "graph": joint_eulers = [] for bone in skeleton.bones: bone_channels = mocap.joint_channels(bone.name) bone_rot_channels = [channel for channel in bone_channels if channel.endswith("rotation")] assert bone_rot_channels == rot_channels, "Rotation channel ordering is not consistent across joints!" # use native rotation order euler = np.deg2rad(np.array(mocap.frames_joint_channels(bone.name, rot_channels))) joint_eulers.append(euler) joint_eulers = np.stack(joint_eulers, axis=1) else: raise ValueError(f"Unknown backend for BVH loading: {effective_backend}") if rot_order == "native": rot_order = "" for axis in rot_channels: rot_order += axis[0] else: # need to reorder dims ordered_joint_eulers = [] for axis in rot_order: i = rot_channels.index(axis + "rotation") ordered_joint_eulers.append(joint_eulers[..., i]) joint_eulers = np.stack(ordered_joint_eulers, axis=-1) rotations = Rotation.from_euler(rot_order, joint_eulers.reshape(-1, 3)) if return_quat: joint_rots = rotations.as_quat(scalar_first=True).reshape(joint_eulers.shape[:-1] + (4,)) else: joint_rots = rotations.as_matrix().reshape(joint_eulers.shape[:-1] + (3, 3)) return root_trans, joint_rots def parse_bvh_motion(file_path_input: str, parse_neutral_joints: bool = False): """Parse a BVH motion into tensors used by kimodo motion pipelines. Args: file_path_input: Path to input BVH file. parse_neutral_joints: If `True`, also return neutral joints in meters. Returns: ``(local_rot_mats, root_trans, fps)`` or ``(local_rot_mats, root_trans, fps, neutral_joints)`` when requested. """ with open(file_path_input) as f: mocap = Bvh(f.read(), backend="np") fps = 1.0 / mocap.frame_time skeletonBVH = SkeletonBvh() exclude_bones = {"Root"} skeletonBVH.load_from_bvh(file_path_input, exclude_bones=exclude_bones, mocap=mocap) root_trans, local_rot_mats = load_bvh_animation(file_path_input, skeletonBVH, mocap=mocap) root_trans *= 0.01 # unit change: cm -> m root_trans = torch.tensor(root_trans) local_rot_mats = torch.tensor(local_rot_mats) # Don't parse neutral_joints here # it is not actually needed right now: # the skeleton is always the same, and saved in the folder # carefull: the one saved in the folder it relative to the standard t_pose # whereas the parsed one is not if not parse_neutral_joints: return local_rot_mats, root_trans, fps neutral_joints = skeletonBVH.get_neutral_joints() neutral_joints *= 0.01 # unit change: cm -> m # remove the root position of the skeleton # (it is already "included" in the root_translation) root_idx = 0 neutral_joints = torch.tensor(neutral_joints - neutral_joints[root_idx]) return local_rot_mats, root_trans, fps, neutral_joints ================================================ FILE: kimodo/skeleton/definitions.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Concrete skeleton definitions: SOMA, G1, SMPLX with joint names and hierarchy.""" from pathlib import Path import numpy as np import torch from ..tools import ensure_batched from .base import SkeletonBase class SOMASkeleton77(SkeletonBase): """High-detail 77-joint SOMA skeleton with full finger and toe chains.""" name = "somaskel77" right_foot_joint_names = [ "RightFoot", "RightToeBase", "RightToeEnd", ] # in order of chain left_foot_joint_names = [ "LeftFoot", "LeftToeBase", "LeftToeEnd", ] # in order of chain right_hand_joint_names = [ "RightHand", "RightHandThumb1", "RightHandThumb2", "RightHandThumb3", "RightHandThumbEnd", "RightHandIndex1", "RightHandIndex2", "RightHandIndex3", "RightHandIndex4", "RightHandIndexEnd", "RightHandMiddle1", "RightHandMiddle2", "RightHandMiddle3", "RightHandMiddle4", "RightHandMiddleEnd", "RightHandRing1", "RightHandRing2", "RightHandRing3", "RightHandRing4", "RightHandRingEnd", "RightHandPinky1", "RightHandPinky2", "RightHandPinky3", "RightHandPinky4", "RightHandPinkyEnd", ] # in order of chain left_hand_joint_names = [ "LeftHand", "LeftHandThumb1", "LeftHandThumb2", "LeftHandThumb3", "LeftHandThumbEnd", "LeftHandIndex1", "LeftHandIndex2", "LeftHandIndex3", "LeftHandIndex4", "LeftHandIndexEnd", "LeftHandMiddle1", "LeftHandMiddle2", "LeftHandMiddle3", "LeftHandMiddle4", "LeftHandMiddleEnd", "LeftHandRing1", "LeftHandRing2", "LeftHandRing3", "LeftHandRing4", "LeftHandRingEnd", "LeftHandPinky1", "LeftHandPinky2", "LeftHandPinky3", "LeftHandPinky4", "LeftHandPinkyEnd", ] # in order of chain hip_joint_names = ["RightLeg", "LeftLeg"] # in order [right, left] bone_order_names_with_parents = [ ("Hips", None), ("Spine1", "Hips"), ("Spine2", "Spine1"), ("Chest", "Spine2"), ("Neck1", "Chest"), ("Neck2", "Neck1"), ("Head", "Neck2"), ("HeadEnd", "Head"), ("Jaw", "Head"), ("LeftEye", "Head"), ("RightEye", "Head"), ("LeftShoulder", "Chest"), ("LeftArm", "LeftShoulder"), ("LeftForeArm", "LeftArm"), ("LeftHand", "LeftForeArm"), ("LeftHandThumb1", "LeftHand"), ("LeftHandThumb2", "LeftHandThumb1"), ("LeftHandThumb3", "LeftHandThumb2"), ("LeftHandThumbEnd", "LeftHandThumb3"), ("LeftHandIndex1", "LeftHand"), ("LeftHandIndex2", "LeftHandIndex1"), ("LeftHandIndex3", "LeftHandIndex2"), ("LeftHandIndex4", "LeftHandIndex3"), ("LeftHandIndexEnd", "LeftHandIndex4"), ("LeftHandMiddle1", "LeftHand"), ("LeftHandMiddle2", "LeftHandMiddle1"), ("LeftHandMiddle3", "LeftHandMiddle2"), ("LeftHandMiddle4", "LeftHandMiddle3"), ("LeftHandMiddleEnd", "LeftHandMiddle4"), ("LeftHandRing1", "LeftHand"), ("LeftHandRing2", "LeftHandRing1"), ("LeftHandRing3", "LeftHandRing2"), ("LeftHandRing4", "LeftHandRing3"), ("LeftHandRingEnd", "LeftHandRing4"), ("LeftHandPinky1", "LeftHand"), ("LeftHandPinky2", "LeftHandPinky1"), ("LeftHandPinky3", "LeftHandPinky2"), ("LeftHandPinky4", "LeftHandPinky3"), ("LeftHandPinkyEnd", "LeftHandPinky4"), ("RightShoulder", "Chest"), ("RightArm", "RightShoulder"), ("RightForeArm", "RightArm"), ("RightHand", "RightForeArm"), ("RightHandThumb1", "RightHand"), ("RightHandThumb2", "RightHandThumb1"), ("RightHandThumb3", "RightHandThumb2"), ("RightHandThumbEnd", "RightHandThumb3"), ("RightHandIndex1", "RightHand"), ("RightHandIndex2", "RightHandIndex1"), ("RightHandIndex3", "RightHandIndex2"), ("RightHandIndex4", "RightHandIndex3"), ("RightHandIndexEnd", "RightHandIndex4"), ("RightHandMiddle1", "RightHand"), ("RightHandMiddle2", "RightHandMiddle1"), ("RightHandMiddle3", "RightHandMiddle2"), ("RightHandMiddle4", "RightHandMiddle3"), ("RightHandMiddleEnd", "RightHandMiddle4"), ("RightHandRing1", "RightHand"), ("RightHandRing2", "RightHandRing1"), ("RightHandRing3", "RightHandRing2"), ("RightHandRing4", "RightHandRing3"), ("RightHandRingEnd", "RightHandRing4"), ("RightHandPinky1", "RightHand"), ("RightHandPinky2", "RightHandPinky1"), ("RightHandPinky3", "RightHandPinky2"), ("RightHandPinky4", "RightHandPinky3"), ("RightHandPinkyEnd", "RightHandPinky4"), ("LeftLeg", "Hips"), ("LeftShin", "LeftLeg"), ("LeftFoot", "LeftShin"), ("LeftToeBase", "LeftFoot"), ("LeftToeEnd", "LeftToeBase"), ("RightLeg", "Hips"), ("RightShin", "RightLeg"), ("RightFoot", "RightShin"), ("RightToeBase", "RightFoot"), ("RightToeEnd", "RightToeBase"), ] @property def relaxed_hands_rest_pose(self): # lazy loading if hasattr(self, "_relaxed_hands_rest_pose"): return self._relaxed_hands_rest_pose relaxed_hands_pose_path = Path(self.folder) / "relaxed_hands_rest_pose.npy" relaxed_hands_rest_pose = torch.from_numpy(np.load(relaxed_hands_pose_path)).squeeze() self.register_buffer( "_relaxed_hands_rest_pose", relaxed_hands_rest_pose, persistent=False, ) return self._relaxed_hands_rest_pose class SOMASkeleton30(SkeletonBase): """Compact 30-joint SOMA variant with reduced hand and end-effector detail.""" name = "somaskel30" right_foot_joint_names = [ "RightFoot", "RightToeBase", ] # in order of chain left_foot_joint_names = [ "LeftFoot", "LeftToeBase", ] # in order of chain right_hand_joint_names = [ "RightHand", "RightHandMiddleEnd", ] # in order of chain left_hand_joint_names = [ "LeftHand", "LeftHandMiddleEnd", ] # in order of chain hip_joint_names = ["RightLeg", "LeftLeg"] # in order [right, left] bone_order_names_with_parents = [ ("Hips", None), ("Spine1", "Hips"), ("Spine2", "Spine1"), ("Chest", "Spine2"), ("Neck1", "Chest"), ("Neck2", "Neck1"), ("Head", "Neck2"), ("Jaw", "Head"), ("LeftEye", "Head"), ("RightEye", "Head"), ("LeftShoulder", "Chest"), ("LeftArm", "LeftShoulder"), ("LeftForeArm", "LeftArm"), ("LeftHand", "LeftForeArm"), ("LeftHandThumbEnd", "LeftHand"), ("LeftHandMiddleEnd", "LeftHand"), ("RightShoulder", "Chest"), ("RightArm", "RightShoulder"), ("RightForeArm", "RightArm"), ("RightHand", "RightForeArm"), ("RightHandThumbEnd", "RightHand"), ("RightHandMiddleEnd", "RightHand"), ("LeftLeg", "Hips"), ("LeftShin", "LeftLeg"), ("LeftFoot", "LeftShin"), ("LeftToeBase", "LeftFoot"), ("RightLeg", "Hips"), ("RightShin", "RightLeg"), ("RightFoot", "RightShin"), ("RightToeBase", "RightFoot"), ] @property def somaskel77(self): # lazy loading if not hasattr(self, "_somaskel77"): self._somaskel77 = SOMASkeleton77() return self._somaskel77 @ensure_batched(local_joint_rots_subset=4) def to_SOMASkeleton77(self, local_joint_rots_subset: torch.Tensor): # Converting from 30-joint to 77-joint to have relaxed hands device = local_joint_rots_subset.device nF = len(local_joint_rots_subset) local_joint_rots_mats = self.somaskel77.relaxed_hands_rest_pose.clone().to(device).repeat(nF, 1, 1, 1) skel_slice = self.get_skel_slice(self.somaskel77) local_joint_rots_mats[:, skel_slice] = local_joint_rots_subset return local_joint_rots_mats @ensure_batched(local_joint_rots_full=4) # [BT, J, 3, 3] def from_SOMASkeleton77(self, local_joint_rots_full: torch.Tensor) -> torch.Tensor: """Extract the 30-joint subset from 77-joint local rotation data.""" skel_slice = self.get_skel_slice(self.somaskel77) return local_joint_rots_full[:, skel_slice] def output_to_SOMASkeleton77(self, output: dict) -> dict: """Convert model output dict from somaskel30 to somaskel77. Expands local_rot_mats to 77 joints, re-runs FK for global_rot_mats and posed_joints. Foot contacts are expanded from 4 channels to 6 (toe-end copies toe-base contact). """ local_rot_mats_77 = self.to_SOMASkeleton77(output["local_rot_mats"]) root_positions = output["root_positions"] global_rot_mats_77, posed_joints_77, _ = self.somaskel77.fk(local_rot_mats_77, root_positions) out_77 = dict(output) out_77["local_rot_mats"] = local_rot_mats_77 out_77["global_rot_mats"] = global_rot_mats_77 out_77["posed_joints"] = posed_joints_77 if "foot_contacts" in output: fc = output["foot_contacts"] # [..., 4]: [L_heel, L_toe, R_heel, R_toe] # -> [..., 6]: [L_heel, L_toe, L_toe_end, R_heel, R_toe, R_toe_end] out_77["foot_contacts"] = torch.cat([fc[..., :2], fc[..., 1:2], fc[..., 2:4], fc[..., 3:4]], dim=-1) return out_77 class G1Skeleton34(SkeletonBase): """Unitree G1 skeleton with 32 articulated joints plus 2 toe endpoints.""" name = "g1skel34" right_foot_joint_names = ["right_ankle_roll_skel", "right_toe_base"] left_foot_joint_names = ["left_ankle_roll_skel", "left_toe_base"] right_hand_joint_names = ["right_wrist_yaw_skel", "right_hand_roll_skel"] left_hand_joint_names = ["left_wrist_yaw_skel", "left_hand_roll_skel"] hip_joint_names = [ "right_hip_pitch_skel", "left_hip_pitch_skel", ] # used to calculate root orientation, only need 1 pair of hip joints bone_order_names_with_parents = [ ("pelvis_skel", None), ("left_hip_pitch_skel", "pelvis_skel"), ("left_hip_roll_skel", "left_hip_pitch_skel"), ("left_hip_yaw_skel", "left_hip_roll_skel"), ("left_knee_skel", "left_hip_yaw_skel"), ("left_ankle_pitch_skel", "left_knee_skel"), ("left_ankle_roll_skel", "left_ankle_pitch_skel"), ("left_toe_base", "left_ankle_roll_skel"), ("right_hip_pitch_skel", "pelvis_skel"), ("right_hip_roll_skel", "right_hip_pitch_skel"), ("right_hip_yaw_skel", "right_hip_roll_skel"), ("right_knee_skel", "right_hip_yaw_skel"), ("right_ankle_pitch_skel", "right_knee_skel"), ("right_ankle_roll_skel", "right_ankle_pitch_skel"), ("right_toe_base", "right_ankle_roll_skel"), ("waist_yaw_skel", "pelvis_skel"), ("waist_roll_skel", "waist_yaw_skel"), ("waist_pitch_skel", "waist_roll_skel"), ("left_shoulder_pitch_skel", "waist_pitch_skel"), ("left_shoulder_roll_skel", "left_shoulder_pitch_skel"), ("left_shoulder_yaw_skel", "left_shoulder_roll_skel"), ("left_elbow_skel", "left_shoulder_yaw_skel"), ("left_wrist_roll_skel", "left_elbow_skel"), ("left_wrist_pitch_skel", "left_wrist_roll_skel"), ("left_wrist_yaw_skel", "left_wrist_pitch_skel"), ("left_hand_roll_skel", "left_wrist_yaw_skel"), ("right_shoulder_pitch_skel", "waist_pitch_skel"), ("right_shoulder_roll_skel", "right_shoulder_pitch_skel"), ("right_shoulder_yaw_skel", "right_shoulder_roll_skel"), ("right_elbow_skel", "right_shoulder_yaw_skel"), ("right_wrist_roll_skel", "right_elbow_skel"), ("right_wrist_pitch_skel", "right_wrist_roll_skel"), ("right_wrist_yaw_skel", "right_wrist_pitch_skel"), ("right_hand_roll_skel", "right_wrist_yaw_skel"), ] class SMPLXSkeleton22(SkeletonBase): """SMPL-X skeleton with body-only 22 joints.""" name = "smplx22" right_foot_joint_names = ["right_ankle", "right_foot"] # in order of chain left_foot_joint_names = ["left_ankle", "left_foot"] # in order of chain right_hand_joint_names = ["right_wrist"] # in order of chain left_hand_joint_names = ["left_wrist"] # in order of chain hip_joint_names = ["right_hip", "left_hip"] # in order [right, left] bone_order_names_with_parents = [ ("pelvis", None), ("left_hip", "pelvis"), ("right_hip", "pelvis"), ("spine1", "pelvis"), ("left_knee", "left_hip"), ("right_knee", "right_hip"), ("spine2", "spine1"), ("left_ankle", "left_knee"), ("right_ankle", "right_knee"), ("spine3", "spine2"), ("left_foot", "left_ankle"), ("right_foot", "right_ankle"), ("neck", "spine3"), ("left_collar", "spine3"), ("right_collar", "spine3"), ("head", "neck"), ("left_shoulder", "left_collar"), ("right_shoulder", "right_collar"), ("left_elbow", "left_shoulder"), ("right_elbow", "right_shoulder"), ("left_wrist", "left_elbow"), ("right_wrist", "right_elbow"), ] ================================================ FILE: kimodo/skeleton/kinematics.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Forward-kinematics primitives for articulated skeletons.""" from typing import List import einops import torch import torch.nn.functional as F from ..tools import ensure_batched @ensure_batched(local_joint_rots=4, root_positions=2) def fk( local_joint_rots: torch.Tensor, root_positions: torch.Tensor, skeleton, root_positions_is_global: bool = True, ): """Compute global joint rotations and positions from local rotations. Args: local_joint_rots: Local rotation matrices with shape `(..., J, 3, 3)`. root_positions: Root translations with shape `(..., 3)`. skeleton: Skeleton object exposing `neutral_joints`, `joint_parents`, and `root_idx`. root_positions_is_global: If `True`, neutral joints are recentered so root translations are interpreted in world space. Returns: Tuple `(global_joint_rots, posed_joints, posed_joints_norootpos)`. """ device = local_joint_rots.device dtype = local_joint_rots.dtype # If skeleton has baked rest (e.g. from XML), identity local = baked rest pose. # So training/inference local rotations are in reference to XML rest *orientations*. rest_local = getattr(skeleton, "rest_local_rots", None) if rest_local is not None: rest_local = rest_local.to(device=device, dtype=dtype) local_joint_rots = torch.einsum("jmn,...jno->...jmo", rest_local, local_joint_rots) # Rest positions for FK. Must be consistent with rest_local: when local = identity, # FK(rest_local, neutral_joints) should equal the XML rest pose positions. So # neutral_joints are not necessarily the raw XML joint positions; they are the # rest layout that, when rotated by rest_local, yields the XML rest positions. neutral_joints = skeleton.neutral_joints.to(device=device, dtype=dtype) if root_positions_is_global is True: # Removing the pelvis offset from the neutral joints # as the root positions does not depends on the pelvis offset of the skeleton pelvis_offset = neutral_joints[skeleton.root_idx] neutral_joints = neutral_joints - pelvis_offset # compute joint position and global rotations joints = einops.repeat( neutral_joints, "j k -> b j k", b=len(local_joint_rots), ) posed_joints_norootpos, global_joint_rots = batch_rigid_transform( local_joint_rots, joints, skeleton.joint_parents, skeleton.root_idx, ) # if root_positions_is_global is True: # posed_joints_norootpos always start at zero # otherwise it could start with the pelvis offset posed_joints = posed_joints_norootpos + root_positions[:, None] return global_joint_rots, posed_joints, posed_joints_norootpos def compute_idx_levels(parents): """Group joint indices by hierarchy depth for level-wise FK updates. Args: parents: Parent index tensor of shape `(J,)` with root parent `-1`. Returns: List of index tensors, where each tensor contains joints at one depth. """ idx_levs = [[]] lev_dicts = {0: -1} for i in range(1, parents.shape[0]): assert int(parents[i]) in lev_dicts lev = lev_dicts[int(parents[i])] + 1 if lev + 1 > len(idx_levs): idx_levs.append([]) idx_levs[lev].append(int(i)) lev_dicts[int(i)] = lev idx_levs = [torch.tensor(x).long() for x in idx_levs] return idx_levs def batch_rigid_transform(rot_mats, joints, parents, root_idx): """Perform batch rigid transformation on a skeletal structure. Args: rot_mats: Local rotation matrices for each joint: (B, J, 3, 3) joints: Initial joint positions: (B, J, 3) parents: Tensor indicating the parent of each joint: (J,) root_idx (int): index of the root Returns: Transformed joint positions after applying forward kinematics. """ # Compute the hierarchical levels of joints based on their parent relationships idx_levs = compute_idx_levels(parents) # Apply forward kinematics to transform the joints return forward_kinematics(rot_mats, joints, parents, idx_levs, root_idx) @torch.jit.script def transform_mat(R, t): """Creates a batch of transformation matrices. Args: - R: Bx3x3 array of a batch of rotation matrices - t: Bx3x1 array of a batch of translation vectors Returns: - T: Bx4x4 Transformation matrix """ # No padding left or right, only add an extra row return torch.cat([F.pad(R, [0, 0, 0, 1]), F.pad(t, [0, 0, 0, 1], value=1.0)], dim=2) @torch.jit.script def forward_kinematics( rot_mats, joints, parents: torch.Tensor, idx_levs: List[torch.Tensor], root_idx: int, ): """Perform forward kinematics to compute posed joints and global rotation matrices. Args: rot_mats: Local rotation matrices for each joint: (B, J, 3, 3) joints: Initial joint positions: (B, J, 3) parents: Tensor indicating the parent of each joint: (J,) idx_levs: Tensors of joint indices grouped by depth in the kinematic tree. root_idx (int): index of the root Returns: Posed joints: (B, J, 3) Global rotation matrices: (B, J, 3, 3) """ # Add an extra dimension to joints joints = torch.unsqueeze(joints, dim=-1) # Compute relative joint positions rel_joints = joints.clone() mask_no_root = torch.ones(joints.shape[1], dtype=torch.bool) mask_no_root[root_idx] = False rel_joints[:, mask_no_root] -= joints[:, parents[mask_no_root]].clone() # Compute initial transformation matrices # (B, J + 1, 4, 4) transforms_mat = transform_mat(rot_mats.reshape(-1, 3, 3), rel_joints.reshape(-1, 3, 1)).reshape( -1, joints.shape[1], 4, 4 ) # Initialize the root transformation matrices transforms = torch.zeros_like(transforms_mat) transforms[:, root_idx] = transforms_mat[:, root_idx] # Compute global transformations level by level for indices in idx_levs: curr_res = torch.matmul(transforms[:, parents[indices]], transforms_mat[:, indices]) transforms[:, indices] = curr_res # Extract posed joint positions from the transformation matrices posed_joints = transforms[:, :, :3, 3] # Extract global rotation matrices from the transformation matrices global_rot_mat = transforms[:, :, :3, :3] return posed_joints, global_rot_mat ================================================ FILE: kimodo/skeleton/registry.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Factory helpers for building predefined skeleton variants.""" from pathlib import Path from kimodo.assets import SKELETONS_ROOT from .definitions import ( G1Skeleton34, SMPLXSkeleton22, SOMASkeleton30, SOMASkeleton77, ) def build_skeleton(nbjoints: int, assets_folder: str | Path = SKELETONS_ROOT): """Instantiate a known skeleton class from its joint count. Supported joint counts: 30 (SOMA compact), 34 (G1), 77 (SOMA full), 22 (SMPLX). Args: nbjoints: Number of joints expected in the skeleton representation. assets_folder: Base skeleton-assets directory containing per-skeleton subfolders. Returns: A configured `SkeletonBase` subclass instance. Raises: ValueError: If `nbjoints` does not match a registered skeleton. """ assets_folder = Path(assets_folder) if nbjoints == 34: return G1Skeleton34(assets_folder / "g1skel34") elif nbjoints == 22: return SMPLXSkeleton22(assets_folder / "smplx22") elif nbjoints == 30: return SOMASkeleton30(assets_folder / "somaskel30") elif nbjoints == 77: return SOMASkeleton77(assets_folder / "somaskel77") else: raise ValueError("This skeleton is not recognized.") ================================================ FILE: kimodo/skeleton/transforms.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Rotation-space conversion utilities for skeleton motion data.""" import einops import torch from ..tools import ensure_batched from .kinematics import batch_rigid_transform def global_rots_to_local_rots(global_joint_rots: torch.Tensor, skeleton): """Convert global rotations to local rotations using a skeleton hierarchy. Args: global_joint_rots: Global rotation matrices with shape `(..., J, 3, 3)`. skeleton: Skeleton object exposing `joint_parents` and `root_idx`. Returns: Local rotation matrices with the same leading shape as the input. """ # Doing big batch global_joint_mats, ps = einops.pack( [global_joint_rots], "* nbjoints dim1 dim2", ) # obtain back the local rotations from the new global rotations parent_rot_mats = global_joint_mats[:, skeleton.joint_parents] parent_rot_mats[:, skeleton.root_idx] = torch.eye(3) # the root joint parent_rot_mats_inv = parent_rot_mats.transpose(2, 3) local_rot_mats = torch.einsum( "T N m n, T N n o -> T N m o", parent_rot_mats_inv, global_joint_mats, ) [local_rot_mats] = einops.unpack(local_rot_mats, ps, "* nbjoints dim1 dim2") return local_rot_mats @ensure_batched(local_rot_mats=4) def change_tpose(local_rot_mats: torch.Tensor, global_rot_offsets: torch.Tensor, skeleton): """Re-express local rotations in another t_pose based on the global rotation offsets. Args: local_rot_mats: Local rotation matrices with shape `(..., J, 3, 3)`. global_rot_offsets: Global rotation offsets with shape `(..., J, 3, 3)`. skeleton: Skeleton object exposing `joint_parents`, `root_idx`, and `nbjoints`. Returns: Tuple `(new_local_rot_mats, new_global_rot_mats)` in the standard frame. """ device, dtype = local_rot_mats.device, local_rot_mats.dtype global_rot_offsets = global_rot_offsets.to(device=device, dtype=dtype) root_idx = skeleton.root_idx joint_parents = skeleton.joint_parents # These are dummy joint positions, will not be used neutral_joints = torch.ones((len(local_rot_mats), skeleton.nbjoints, 3), device=device, dtype=dtype) # get the old joint rotations in the same global space as the t-pose # Note: the neutral joints we use here doesn't matter, because we are only using the global rotation outputs _, global_rot_mats = batch_rigid_transform(local_rot_mats, neutral_joints, joint_parents, root_idx) # (T, N, 3, 3) # compute the desired joint rotations in the frame of the new t-pose new_global_rot_mats = torch.einsum("T N m n, N o n -> T N m o", global_rot_mats, global_rot_offsets) # convert back to local rotations new_local_rot_mats = global_rots_to_local_rots(new_global_rot_mats, skeleton) return new_local_rot_mats, new_global_rot_mats @ensure_batched(local_rot_mats=4) def to_standard_tpose(local_rot_mats: torch.Tensor, skeleton): """Re-express local rotations in the skeleton's standard T-pose convention. Args: local_rot_mats: Local rotation matrices with shape `(..., J, 3, 3)`. skeleton: Skeleton object exposing `global_rot_offsets`, `joint_parents`, `root_idx`, and `nbjoints`. Returns: Tuple `(new_local_rot_mats, new_global_rot_mats)` in the standard frame. """ global_rot_offsets = skeleton.global_rot_offsets return change_tpose(local_rot_mats, global_rot_offsets, skeleton) @ensure_batched(local_rot_mats=4) def from_standard_tpose(local_rot_mats: torch.Tensor, skeleton): """Re-express local rotations from the skeleton's standard T-pose convention to the original formulation. Args: local_rot_mats: Local rotation matrices with shape `(..., J, 3, 3)`. skeleton: Skeleton object exposing `global_rot_offsets`, `joint_parents`, `root_idx`, and `nbjoints`. Returns: Tuple `(new_local_rot_mats, new_global_rot_mats)` in the standard frame. """ global_rot_offsets = skeleton.global_rot_offsets global_rot_offsets_T = global_rot_offsets.mT # do the inverse transform return change_tpose(local_rot_mats, global_rot_offsets_T, skeleton) ================================================ FILE: kimodo/tools.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Shared utilities: validation decorator, batching, JSON I/O, seeding, tensor conversion.""" import inspect import json import math import random from collections.abc import Mapping, Sequence from functools import wraps from math import prod from pathlib import Path from typing import Any, Callable, Mapping, Optional, ParamSpec, TypeVar, Union import numpy as np import torch def validate(validator, save_args: bool = False, super_init: bool = False): """Create a decorator function for validating user inputs. Args: validator: the function to validate (pydantic dataclass) save (bool): save all the attributes to the obj [args[0]] super_init (bool): init parent with no arguments (useful for using save on a nn.Module) Returns: decorator: the decorator function """ def decorator(func): @wraps(func) def validated_func(*args, **kwargs): conf = validator(**kwargs) if save_args: assert len(args) != 0 obj = args[0] if super_init: # init the parent module super(type(obj), obj).__init__() for key, val in conf.__dict__.items(): setattr(obj, key, val) return func(*args, conf) return validated_func return decorator # Type alias for clarity Tensor = Any P = ParamSpec("P") R = TypeVar("R") def ensure_batched(**spec: int) -> Callable[[Callable[P, R]], Callable[P, R]]: """Decorator to flatten complex batch dimensions. Fixes included: 1. Handles 1D tensors (tail_ndim=0) correctly without slicing errors. 2. Skips .reshape() if the input is already purely flat (Optimization). """ if not spec: raise ValueError("At least one argument spec must be provided.") def decorator(fn: Callable[P, R]) -> Callable[P, R]: sig = inspect.signature(fn) @wraps(fn) def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: bound = sig.bind(*args, **kwargs) bound.apply_defaults() def _sequence_shape(name: str, value: Any) -> tuple[int, ...]: if not isinstance(value, (list, tuple)): return () if len(value) == 0: return (0,) first_shape = _sequence_shape(name, value[0]) for item in value[1:]: item_shape = _sequence_shape(name, item) if item_shape != first_shape: raise ValueError(f"'{name}' must be a rectangular nested sequence, got ragged shape.") return (len(value), *first_shape) def _shape_and_ndim(name: str, value: Any) -> tuple[tuple[int, ...], int]: if hasattr(value, "shape") and hasattr(value, "ndim"): shape = tuple(value.shape) return shape, int(value.ndim) if isinstance(value, (list, tuple)): shape = _sequence_shape(name, value) return shape, len(shape) raise TypeError(f"'{name}' must be tensor-like or a nested list/tuple, got {type(value)}.") def _reshape_like(value: Any, shape: tuple[int, ...], name: str) -> Any: if hasattr(value, "reshape"): return value.reshape(*shape) if not isinstance(value, (list, tuple)): raise TypeError(f"Cannot reshape '{name}' of type {type(value)}.") flat: list[Any] = [] def _flatten(x: Any) -> None: if isinstance(x, (list, tuple)): for item in x: _flatten(item) else: flat.append(x) _flatten(value) expected_size = prod(shape) if shape else 1 if len(flat) != expected_size: raise ValueError(f"Cannot reshape '{name}' with {len(flat)} elements into shape {shape}.") def _build(index: int, dims: tuple[int, ...]) -> tuple[Any, int]: if not dims: return flat[index], index + 1 items = [] for _ in range(dims[0]): item, index = _build(index, dims[1:]) items.append(item) return items, index rebuilt, used = _build(0, shape) if used != len(flat): raise ValueError(f"Internal reshape error for '{name}': used {used}/{len(flat)} elements.") if isinstance(value, tuple) and isinstance(rebuilt, list): return tuple(rebuilt) return rebuilt # --- 1. CANONICAL ARGUMENT --- spec_items = list(spec.items()) canonical_name = None canonical_ndim = None x0 = None for name, ndim in spec_items: candidate = bound.arguments.get(name, None) if candidate is not None: canonical_name = name canonical_ndim = ndim x0 = candidate break if canonical_name is None: raise ValueError( "All canonical candidates are None: " + ", ".join(f"'{name}'" for name, _ in spec_items) ) # Calculate split between Batch dims and Feature dims expected_tail_dims = canonical_ndim - 1 # e.g. 3 - 1 = 2 (Sequence, Feat) x0_shape, x0_ndim = _shape_and_ndim(canonical_name, x0) # Validation if x0_ndim < expected_tail_dims: raise ValueError(f"'{canonical_name}' ndim={x0_ndim} < expected {expected_tail_dims} tail dims.") # --- LOGIC FIX 1: Handle 0 tail dims correctly --- if expected_tail_dims == 0: orig_batch_shape = x0_shape tail_shape = () else: orig_batch_shape = x0_shape[:-expected_tail_dims] tail_shape = x0_shape[-expected_tail_dims:] # Calculate flattened batch size # If orig_batch_shape is () (scalar input), size is 1. B_flat = prod(orig_batch_shape) if orig_batch_shape else 1 # Determine if we added a fake batch dim (unbatched input) is_unbatched_input = len(orig_batch_shape) == 0 # --- LOGIC FIX 2: Skip reshape if already flat (Optimization) --- # If batch shape is already 1D (e.g. [2]), we don't need to reshape [2, 140, 5] -> [2, 140, 5] is_already_flat = len(orig_batch_shape) == 1 if is_unbatched_input: # (H, W) -> (1, H, W) x0_batched = _reshape_like(x0, (1, *tail_shape), canonical_name) elif is_already_flat: # (B, H, W) -> Keep as is x0_batched = x0 else: # (B1, B2, H, W) -> (B1*B2, H, W) x0_batched = _reshape_like(x0, (B_flat, *tail_shape), canonical_name) bound.arguments[canonical_name] = x0_batched # --- 2. OTHER ARGUMENTS --- for name, target_ndim in spec_items: if name == canonical_name: continue val = bound.arguments.get(name, None) if val is None: continue arg_tail_dims = target_ndim - 1 # e.g. for lengths=1, tail=0 val_shape, val_ndim = _shape_and_ndim(name, val) # Validate if val_ndim < arg_tail_dims: raise ValueError(f"'{name}' ndim={val_ndim} too small.") # --- Get Batch Shape (With 0-tail fix) --- if arg_tail_dims == 0: val_batch_shape = val_shape val_tail_shape = () else: val_batch_shape = val_shape[:-arg_tail_dims] val_tail_shape = val_shape[-arg_tail_dims:] # --- Check Mismatch --- # Unbatched inputs must match unbatched canonical if len(val_batch_shape) == 0: if not is_unbatched_input: raise ValueError(f"'{name}' is unbatched but canonical is batched.") val_batched = _reshape_like(val, (1, *val_tail_shape), name) else: # Batched inputs must match canonical batch shape EXACTLY if val_batch_shape != orig_batch_shape: raise ValueError( f"Batch dimensions mismatch! '{canonical_name}' has {orig_batch_shape}, " f"but '{name}' has {val_batch_shape}." ) # Optimization: Don't reshape if already flat if is_already_flat: val_batched = val else: val_batched = _reshape_like(val, (B_flat, *val_tail_shape), name) bound.arguments[name] = val_batched # --- 3. EXECUTION --- out = fn(**bound.arguments) # --- 4. RESTORE --- def restore(obj): if isinstance(obj, Mapping): return {k: restore(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return type(obj)(restore(x) for x in obj) if hasattr(obj, "shape"): if obj.ndim == 0: return obj # Verify batch dimension exists and wasn't reduced if obj.shape[0] != B_flat: return obj # If input was simple (B, ...), return simple (B, ...) if is_already_flat: return obj rest = obj.shape[1:] if is_unbatched_input: assert obj.shape[0] == 1, "The batch size should be 1 for unbatched." return obj[0] return obj.reshape(*orig_batch_shape, *rest) return obj return restore(out) return wrapper return decorator def to_numpy(obj): """Recursively convert tensors in dicts/lists/tuples to numpy arrays; leave other types unchanged.""" if isinstance(obj, Mapping): return {k: to_numpy(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return type(obj)(to_numpy(x) for x in obj) if isinstance(obj, torch.Tensor): return obj.cpu().numpy() return obj def to_torch(obj, device=None, dtype=None): """Recursively convert numpy arrays in dicts/lists/tuples to torch tensors; optionally move to device/dtype.""" if isinstance(obj, Mapping): return {k: to_torch(v, device, dtype) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return type(obj)(to_torch(x, device, dtype) for x in obj) if isinstance(obj, np.ndarray): obj = torch.from_numpy(obj) if isinstance(obj, torch.Tensor): if dtype is not None: obj = obj.to(dtype=dtype) if device is None: return obj return obj.to(device) return obj def seed_everything(seed: int, deterministic: bool = False) -> None: """Seed all random number generators.""" random.seed(seed) # for Python random module. np.random.seed(seed) # for NumPy. torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) if deterministic: torch.backends.cudnn.deterministic = True # for deterministic behavior. torch.backends.cudnn.benchmark = False # if you want to make the behavior deterministic. def load_json(path: Union[str, Path]) -> Any: """Load a JSON file and return its contents. Args: path (str | Path): Path to the JSON file. Returns: Any: Parsed JSON content (dict, list, etc.). Raises: FileNotFoundError: If the file does not exist. ValueError: If the file is not valid JSON. """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"JSON file not found: {path}") try: with path.open("r", encoding="utf-8") as f: return json.load(f) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in file {path}: {e}") from e def save_json(path: Union[str, Path], data: Any) -> None: """Save data to a JSON file. Args: path (str | Path): Path to the JSON file. data (Any): Data to save (must be JSON serializable). Raises: ValueError: If the data is not JSON serializable. """ path = Path(path) # Create parent directories if they don't exist path.parent.mkdir(parents=True, exist_ok=True) try: with path.open("w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) except (TypeError, ValueError) as e: raise ValueError(f"Data is not JSON serializable: {e}") from e ================================================ FILE: kimodo/viz/__init__.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Viser-based 3D visualization for skeletons and motion.""" from . import viser_utils from .viser_utils import ( Character, CharacterMotion, ConstraintSet, EEJointsKeyframeSet, FullbodyKeyframeSet, GuiElements, RootKeyframe2DSet, SkeletonMesh, WaypointMesh, load_example_cases, ) __all__ = [ "Character", "CharacterMotion", "ConstraintSet", "EEJointsKeyframeSet", "FullbodyKeyframeSet", "GuiElements", "RootKeyframe2DSet", "SkeletonMesh", "WaypointMesh", "load_example_cases", "viser_utils", ] ================================================ FILE: kimodo/viz/constraint_ui.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Constraint visualization and frame indexing for the viz UI.""" from typing import List, Optional import numpy as np import torch import viser import viser.transforms as tf from kimodo.motion_rep.smooth_root import get_smooth_root_pos from kimodo.skeleton import SkeletonBase from kimodo.tools import to_numpy, to_torch from .scene import SkeletonMesh, WaypointMesh def update_interval(interval_start, interval_end, start_frame_idx, end_frame_idx): """Updates an interval after removing the range from start_frame_idx to end_frame_idx.""" # Calculate new range after removing [start_frame_idx, end_frame_idx] # Case 1: Removal fully contains the interval -> delete entirely if start_frame_idx <= interval_start and end_frame_idx >= interval_end: return None, None # Already removed, don't recreate # Case 2: Removal is at the start of interval -> shrink from start elif start_frame_idx <= interval_start and end_frame_idx < interval_end: new_start = end_frame_idx + 1 new_end = interval_end # Case 3: Removal is at the end of interval -> shrink from end elif start_frame_idx > interval_start and end_frame_idx >= interval_end: new_start = interval_start new_end = start_frame_idx - 1 # Case 4: Removal is in the middle -> keep the larger portion else: # start_frame_idx > interval_start and end_frame_idx < interval_end left_size = start_frame_idx - interval_start right_size = interval_end - end_frame_idx if left_size >= right_size: new_start = interval_start new_end = start_frame_idx - 1 else: new_start = end_frame_idx + 1 new_end = interval_end return new_start, new_end class ConstraintSet: def __init__( self, name: str, server: viser.ViserServer, skeleton: SkeletonBase, display_name: Optional[str] = None, ): self.name = name self.server = server self.skeleton = skeleton self.display_name = display_name if display_name is not None else name self.keyframes = dict() # frame_idx -> poses self.frame2keyid = dict() # frame_idx -> list of keyframe ids at this frame self.scene_elements = dict() # frame_idx -> meshes, labels, etc. self.interval_labels = dict() # (start_frame_idx, end_frame_idx) -> interval_label self.labels_visible = True def set_label_visibility(self, visible: bool) -> None: """Show or hide constraint labels without deleting them.""" self.labels_visible = visible for scene_data in self.scene_elements.values(): label = scene_data.get("label") if label is not None: label.visible = visible for interval_label in self.interval_labels.values(): interval_label.visible = visible def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None: """Show all overlay elements, or only those at the given frame. Args: only_frame: If None, show all overlays. If int, show only overlays at that frame. """ raise NotImplementedError("Subclasses must implement this method") def add_keyframe(self, keyframe_id: str, frame_idx: int, pose_data: torch.Tensor): """Adds a single keyframe at the given frame with the given pose data. Args: keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx. frame_idx: int, frame index to add the keyframe at pose_data: torch.Tensor, e.g. full-body pose, EE pose, 2D root pose, etc. """ raise NotImplementedError("Subclasses must implement this method") def add_interval( self, interval_id: str, start_frame_idx: int, end_frame_idx: int, pose_seq_data: torch.Tensor, ): """Adds a keyframe interval between the given start and end frames with the given pose data. Args: interval_id: str, id for the interval. Must be unique within the given start_frame_idx and end_frame_idx. start_frame_idx: int, start frame index of the interval end_frame_idx: int, end frame index of the interval pose_seq_data: torch.Tensor, data for constrained interval, e.g. full-body poses, EE poses, 2D root poses, etc. """ raise NotImplementedError("Subclasses must implement this method") def _add_interval_label(self, start_frame_idx: int, end_frame_idx: int): """ Adds an interval label between the given start and end frames Args: start_frame_idx: int, start frame index of the interval end_frame_idx: int, end frame index of the interval """ mid = int((start_frame_idx + end_frame_idx) / 2) interval_label_pos = self._get_label_pos(mid) interval_label = self.server.scene.add_label( name=f"/{self.name}/interval_label_{start_frame_idx}_{end_frame_idx}", text=f"{self.display_name} @ [{start_frame_idx}, {end_frame_idx}]", position=interval_label_pos, font_size_mode="screen", font_screen_scale=0.7, anchor="center-center", ) interval_label.visible = self.labels_visible self.interval_labels[(start_frame_idx, end_frame_idx)] = interval_label def remove_keyframe(self, keyframe_id: str, frame_idx: int): """ Removes a keyframe at the given frame Args: keyframe_id: str, id for the keyframe to remove frame_idx: int, frame index to remove the keyframe at """ raise NotImplementedError("Subclasses must implement this method") def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int): """ Removes an interval between the given start and end frames Args: interval_id: str, id for the interval to remove start_frame_idx: int, start frame index of the interval end_frame_idx: int, end frame index of the interval """ raise NotImplementedError("Subclasses must implement this method") def _get_label_pos(self, frame_idx: int): """ Returns the position of where to place the displayed label for the given frame index Args: frame_idx: int, frame index to get the label position for """ raise NotImplementedError("Subclasses must implement this method") def _remove_interval_and_update_label(self, interval_id: str, start_frame_idx: int, end_frame_idx: int): """ Removes an interval between the given start and end frames and updates the label Args: start_frame_idx: int, start frame index of the interval end_frame_idx: int, end frame index of the interval """ for frame_idx in range(start_frame_idx, end_frame_idx + 1): self.remove_keyframe(interval_id, frame_idx) # Update interval labels that overlap with the removed range intervals_to_update = [] for (interval_start, interval_end), label in list(self.interval_labels.items()): # Check if intervals overlap if interval_start <= end_frame_idx and interval_end >= start_frame_idx: intervals_to_update.append((interval_start, interval_end, label)) for interval_start, interval_end, label in intervals_to_update: # Remove old label from scene and dict self.server.scene.remove_by_name(label.name) del self.interval_labels[(interval_start, interval_end)] new_start, new_end = update_interval(interval_start, interval_end, start_frame_idx, end_frame_idx) if new_start is None or new_end is None: continue # Create updated label with new range if new_start <= new_end: # Position label at midpoint - these keyframes are guaranteed to exist # since the new range is outside the removal range mid_frame = (new_start + new_end) // 2 label_pos = self._get_label_pos(mid_frame) new_label = self.server.scene.add_label( name=f"/{self.name}/interval_label_{new_start}_{new_end}", text=f"{self.display_name} @ [{new_start}, {new_end}]", position=label_pos, font_size_mode="screen", font_screen_scale=0.7, anchor="center-center", ) new_label.visible = self.labels_visible self.interval_labels[(new_start, new_end)] = new_label def get_constraint_info(self, device: Optional[str] = None): """Returns constraint information for generation (torch) or UI (numpy).""" raise NotImplementedError("Subclasses must implement this method") def get_frame_idx(self): """Returns all constrained frame indices in the set.""" return [frame_idx for frame_idx in list(self.keyframes.keys())] def clear(self, frame_idx: Optional[int] = None): """ Clears all keyframes and intervals from the constraint set Args: frame_idx: int, sing frame index to clear if given """ raise NotImplementedError("Subclasses must implement this method") def build_constraint_set_table_markdown(constraint_list: List[ConstraintSet]): markdown = "| Track | Frame Num |\n" markdown += "|------|----------|\n" # Sort constraints by frame_idx for constraint in constraint_list: frame_info = constraint.get_frame_idx() if len(frame_info) > 0: frame_info = ", ".join([str(frame) for frame in sorted(frame_info)]) else: frame_info = "-" markdown += f"| {constraint.display_name} | {frame_info} |\n" return markdown class FullbodyKeyframeSet(ConstraintSet): def __init__( self, name: str, server: viser.ViserServer, skeleton: SkeletonBase, display_name: Optional[str] = None, ): super().__init__(name, server, skeleton, display_name=display_name) def add_keyframe( self, keyframe_id: str, frame_idx: int, joints_pos: torch.Tensor | np.ndarray, joints_rot: torch.Tensor | np.ndarray, viz_label: bool = True, exists_ok: bool = False, ): """Adds a single full-body keyframe at the given frame or updates the existing one at this frame. Note if a keyframe already exists at this frame, it will be updated to the given pose. Args: keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx. frame_idx: int, frame index to add the keyframe at joints_pos: torch.Tensor, [J, 3] joints positions to add the keyframe at """ # create/update scene elements if frame_idx in self.keyframes: skeleton_mesh = self.scene_elements[frame_idx]["skeleton_mesh"] skeleton_mesh.set_pose(to_torch(joints_pos)) if viz_label and "label" in self.scene_elements[frame_idx]: label = self.scene_elements[frame_idx]["label"] label.position = to_numpy(joints_pos)[self.skeleton.root_idx] label.visible = self.labels_visible else: # create skeleton to visualize the full-body constraint skeleton_mesh = SkeletonMesh( f"/{self.name}/skeleton_{frame_idx}", self.server, self.skeleton, joint_color=(255, 235, 0), bone_color=(255, 0, 0), starting_joints_pos=to_torch(joints_pos), ) self.scene_elements[frame_idx] = { "skeleton_mesh": skeleton_mesh, } if viz_label: label = self.server.scene.add_label( name=f"/{self.name}/label_{frame_idx}", text=f"{self.display_name} @ {frame_idx}", position=to_numpy(joints_pos)[self.skeleton.root_idx], font_size_mode="screen", font_screen_scale=0.7, anchor="center-center", ) label.visible = self.labels_visible self.scene_elements[frame_idx]["label"] = label # set/update data self.keyframes[frame_idx] = { "joints_pos": to_numpy(joints_pos), "joints_rot": to_numpy(joints_rot), } if frame_idx not in self.frame2keyid: self.frame2keyid[frame_idx] = [] if keyframe_id in self.frame2keyid[frame_idx]: if not exists_ok: raise AssertionError("keyframe_id already exists in this frame!") else: self.frame2keyid[frame_idx].append(keyframe_id) def add_interval( self, interval_id: str, start_frame_idx: int, end_frame_idx: int, joints_pos: torch.Tensor, joints_rot: torch.Tensor, ): """Adds a full-body keyframe interval between the given start and end frames. Args: start_frame_idx: int, start frame index of the interval end_frame_idx: int, end frame index of the interval joints_pos: torch.Tensor, [T, J, 3] joints positions within the interval """ assert joints_pos.shape[0] == end_frame_idx - start_frame_idx + 1 for frame_idx in range(start_frame_idx, end_frame_idx + 1): rel_idx = frame_idx - start_frame_idx self.add_keyframe( interval_id, frame_idx, joints_pos[rel_idx], joints_rot[rel_idx], viz_label=False, ) # add separate interval label self._add_interval_label(start_frame_idx, end_frame_idx) def remove_keyframe(self, keyframe_id: str, frame_idx: int): if frame_idx not in self.keyframes: return if keyframe_id not in self.frame2keyid[frame_idx]: return self.frame2keyid[frame_idx].remove(keyframe_id) if len(self.frame2keyid[frame_idx]) == 0: del self.frame2keyid[frame_idx] self.clear(frame_idx) def _get_label_pos(self, frame_idx: int): return self.keyframes[frame_idx]["joints_pos"][self.skeleton.root_idx] def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int): self._remove_interval_and_update_label(interval_id, start_frame_idx, end_frame_idx) def get_constraint_info(self, device: Optional[str] = None): all_joints_pos = [] all_joints_rot = [] for v in self.keyframes.values(): joints_pos = to_torch(v["joints_pos"], device=device) joints_rot = to_torch(v["joints_rot"], device=device) if len(joints_pos.shape) == 2: all_joints_pos.append(joints_pos[None]) else: all_joints_pos.append(joints_pos) if len(joints_rot.shape) == 3: all_joints_rot.append(joints_rot[None]) else: all_joints_rot.append(joints_rot) all_joints_pos = torch.cat(all_joints_pos, dim=0) if len(all_joints_pos) > 0 else None all_joints_rot = torch.cat(all_joints_rot, dim=0) if len(all_joints_rot) > 0 else None return { "frame_idx": self.get_frame_idx(), "joints_pos": all_joints_pos, "joints_rot": all_joints_rot, } def clear(self, frame_idx: Optional[int] = None): frame_idx_list = list(self.keyframes.keys()) if frame_idx is None else [frame_idx] for fidx in frame_idx_list: self.scene_elements[fidx]["skeleton_mesh"].clear() if "ee_rotation_axes" in self.scene_elements[fidx]: self.server.scene.remove_by_name(self.scene_elements[fidx]["ee_rotation_axes"].name) if "label" in self.scene_elements[fidx]: self.server.scene.remove_by_name(self.scene_elements[fidx]["label"].name) self.keyframes.pop(fidx) self.scene_elements.pop(fidx) self.frame2keyid.pop(fidx, None) if frame_idx is None: # clear all interval labels if clearing all keyframes for interval_label in list(self.interval_labels.values()): self.server.scene.remove_by_name(interval_label.name) self.interval_labels.clear() self.frame2keyid.clear() def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None: show_all = only_frame is None for fidx, scene_data in self.scene_elements.items(): visible = show_all or fidx == only_frame scene_data["skeleton_mesh"].set_visibility(visible) label = scene_data.get("label") if label is not None: label.visible = visible and self.labels_visible for interval_label in self.interval_labels.values(): interval_label.visible = show_all and self.labels_visible class EEJointsKeyframeSet(ConstraintSet): def __init__( self, name: str, server: viser.ViserServer, skeleton: SkeletonBase, display_name: Optional[str] = None, ): super().__init__(name, server, skeleton, display_name=display_name) # frame_idx -> list of (keyframe_id, joint_names) at this frame self.frame2keyid = dict() def create_scene_elements( self, frame_idx: int, joints_pos: torch.Tensor | np.ndarray, joints_rot: Optional[torch.Tensor | np.ndarray], joint_names: List[str], viz_label: bool = True, ): # create skeleton to visualize the full-body constraint ee_joint_indices = [] ee_gizmo_indices = [] constrained_bone_idx = [] for joint_name in joint_names: if joint_name == "Hips": continue elif joint_name in ["LeftHand", "RightHand", "LeftFoot", "RightFoot"]: expanded_joint_names = { "LeftHand": self.skeleton.left_hand_joint_names, "RightHand": self.skeleton.right_hand_joint_names, "LeftFoot": self.skeleton.left_foot_joint_names, "RightFoot": self.skeleton.right_foot_joint_names, }[joint_name] ee_joint_indices.extend([self.skeleton.bone_order_names_index[joint] for joint in expanded_joint_names]) if len(expanded_joint_names) > 1: ee_gizmo_indices.extend( [self.skeleton.bone_order_names_index[joint] for joint in expanded_joint_names[:1]] ) constrained_bone_idx.extend( [self.skeleton.bone_order_names_index[joint] - 1 for joint in expanded_joint_names[1:]] ) else: raise ValueError(f"Invalid joint name: {joint_name}") # de-duplicate while preserving order ee_joint_indices = list(dict.fromkeys(ee_joint_indices)) ee_gizmo_indices = list(dict.fromkeys(ee_gizmo_indices)) constrained_bone_idx = list(dict.fromkeys(constrained_bone_idx)) constrained_idx = [self.skeleton.root_idx] + ee_joint_indices constrained_idx = np.array(constrained_idx) constrained_bone_idx = np.array(constrained_bone_idx) # create skeleton to visualize the full-body constraint joint_color = np.full((self.skeleton.nbjoints, 3), (220, 220, 220)) bone_color = np.full((self.skeleton.nbjoints - 1, 3), (220, 220, 220)) # color constrained joints differently joint_color[constrained_idx] = (255, 0, 0) bone_color[constrained_bone_idx] = (255, 0, 0) skeleton_mesh = SkeletonMesh( f"/{self.name}/skeleton_{frame_idx}", self.server, self.skeleton, joint_color=joint_color, bone_color=bone_color, starting_joints_pos=to_torch(joints_pos), ) self.scene_elements[frame_idx] = { "skeleton_mesh": skeleton_mesh, } joints_pos_np = to_numpy(joints_pos) joints_rot_np = to_numpy(joints_rot) if joints_rot is not None else None if joints_rot_np is not None and len(ee_gizmo_indices) > 0: ee_axes = self.server.scene.add_batched_axes( f"/{self.name}/ee_rot_axes_{frame_idx}", batched_wxyzs=tf.SO3.from_matrix(joints_rot_np[ee_gizmo_indices]).wxyz, batched_positions=joints_pos_np[ee_gizmo_indices], axes_length=0.07, axes_radius=0.007, ) self.scene_elements[frame_idx]["ee_rotation_axes"] = ee_axes if viz_label: label = self.server.scene.add_label( name=f"/{self.name}/label_{frame_idx}", text=f"{self.display_name} @ {frame_idx}", position=joints_pos_np[self.skeleton.root_idx] + np.array([0.0, 0.05, 0.0]), font_size_mode="screen", font_screen_scale=0.7, anchor="bottom-center", ) label.visible = self.labels_visible self.scene_elements[frame_idx]["label"] = label def add_keyframe( self, keyframe_id: str, frame_idx: int, joints_pos: torch.Tensor | np.ndarray, joints_rot: torch.Tensor | np.ndarray, joint_names: List[str], end_effector_type: str, viz_label: bool = True, exists_ok: bool = False, ): """Adds a single EE keyframe at the given frame or updates the existing one at this frame. Args: keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx. frame_idx: int, frame index to add the keyframe at joints_pos: torch.Tensor, [J, 3] joints positions to add the keyframe at joints_rot: torch.Tensor, [J, 3, 3] joints rotation matrices to add the keyframe at joint_names: List[str], names of the joints to add the keyframe at """ need_create_viz = True joint_names_input = joint_names if not isinstance(end_effector_type, set): end_effector_type = set([end_effector_type]) # create/update scene elements if frame_idx in self.keyframes: if joint_names != self.keyframes[frame_idx]["joint_names"]: # merge together with existing constraint if needed joint_names = set(joint_names) joint_names.update(set(self.keyframes[frame_idx]["joint_names"])) joint_names = list(joint_names) end_effector_type.update(self.keyframes[frame_idx]["end_effector_type"]) # need to re-create viz elements self.clear(frame_idx) else: need_create_viz = False # overwrite the pose with the latest one skeleton_mesh = self.scene_elements[frame_idx]["skeleton_mesh"] skeleton_mesh.set_pose(to_torch(joints_pos)) if "ee_rotation_axes" in self.scene_elements[frame_idx]: ee_gizmo_indices = [] for joint_name in joint_names: if joint_name == "Hips": continue elif joint_name in [ "LeftHand", "RightHand", "LeftFoot", "RightFoot", ]: expanded_joint_names = { "LeftHand": self.skeleton.left_hand_joint_names, "RightHand": self.skeleton.right_hand_joint_names, "LeftFoot": self.skeleton.left_foot_joint_names, "RightFoot": self.skeleton.right_foot_joint_names, }[joint_name] if len(expanded_joint_names) > 0: ee_gizmo_indices.extend( [self.skeleton.bone_order_names_index[joint] for joint in expanded_joint_names[:1]] # take only the base joint of the end effector (to avoid clutter) ) else: raise ValueError(f"Invalid joint name: {joint_name}") ee_gizmo_indices = list(dict.fromkeys(ee_gizmo_indices)) if len(ee_gizmo_indices) > 0: ee_axes = self.scene_elements[frame_idx]["ee_rotation_axes"] joints_pos_np = to_numpy(joints_pos) joints_rot_np = to_numpy(joints_rot) ee_axes.batched_positions = joints_pos_np[ee_gizmo_indices] ee_axes.batched_wxyzs = tf.SO3.from_matrix(joints_rot_np[ee_gizmo_indices]).wxyz if viz_label and "label" in self.scene_elements[frame_idx]: label = self.scene_elements[frame_idx]["label"] label.position = to_numpy(joints_pos)[self.skeleton.root_idx] label.visible = self.labels_visible if need_create_viz: self.create_scene_elements(frame_idx, joints_pos, joints_rot, joint_names, viz_label=viz_label) # set/update data self.keyframes[frame_idx] = { "joints_pos": to_numpy(joints_pos), "joints_rot": to_numpy(joints_rot), "joint_names": joint_names, "end_effector_type": end_effector_type, } if frame_idx not in self.frame2keyid: self.frame2keyid[frame_idx] = [] known_keyframe_ids = {k: idx for idx, (k, _) in enumerate(self.frame2keyid[frame_idx])} if keyframe_id in known_keyframe_ids.keys(): if not exists_ok: raise AssertionError("keyframe_id already exists in this frame!") idx = known_keyframe_ids[keyframe_id] # override previous exisiting keyframe self.frame2keyid[frame_idx][idx] = (keyframe_id, joint_names_input) else: # track which subset of joints are constrained by this keyframe_id self.frame2keyid[frame_idx].append((keyframe_id, joint_names_input)) def add_interval( self, interval_id: str, start_frame_idx: int, end_frame_idx: int, joints_pos: torch.Tensor | np.ndarray, joints_rot: torch.Tensor | np.ndarray, joint_names: List[str], end_effector_type: str, ): """Adds an interval of EE keyframes at the given frame or updates the existing one at this frame. Args: interval_id: str, id for the interval. Must be unique within the given start_frame_idx and end_frame_idx. start_frame_idx: int, start frame index to add the interval at end_frame_idx: int, end frame index to add the interval at joints_pos: torch.Tensor, [T, J, 3] joints positions to add the interval at joints_rot: torch.Tensor, [T, J, 3, 3] joints rotation matrices to add the interval at joint_names: List[str], names of the joints to add for the entire interval """ num_frames = end_frame_idx - start_frame_idx + 1 joints_pos_np = to_numpy(joints_pos) joints_rot_np = to_numpy(joints_rot) assert joints_pos_np.shape[0] == num_frames assert joints_rot_np.shape[0] == num_frames for frame_idx in range(start_frame_idx, end_frame_idx + 1): rel_idx = frame_idx - start_frame_idx self.add_keyframe( interval_id, frame_idx, joints_pos_np[rel_idx], joints_rot_np[rel_idx], joint_names, end_effector_type, viz_label=False, ) self._add_interval_label(start_frame_idx, end_frame_idx) def remove_keyframe(self, keyframe_id: str, frame_idx: int): """Removes a keyframe at the given frame or updates the existing one at this frame by removing the specified joints. Args: keyframe_id: str, id for the keyframe to remove. This determines which joints to remove. frame_idx: int, frame index to remove the keyframe at """ if frame_idx not in self.keyframes: return remaining_joint_names = set() delete_idx = None for i, (keyid, joint_names) in enumerate(self.frame2keyid[frame_idx]): if keyid == keyframe_id: delete_idx = i else: remaining_joint_names.update(joint_names) if delete_idx is None: # this keyframe_id is not in the specified frame return self.frame2keyid[frame_idx].pop(delete_idx) if len(remaining_joint_names) == 0: # no more keyframes in this frame, clear the frame del self.frame2keyid[frame_idx] self.clear(frame_idx) return # only deleting part of keyframe (potentially some subset of joints) # delete the old visualization and add a new one with the updated joint set new_joint_names = list(remaining_joint_names) self.clear(frame_idx, scene_elements_only=True) joints_pos = self.keyframes[frame_idx]["joints_pos"] joints_rot = self.keyframes[frame_idx]["joints_rot"] self.create_scene_elements(frame_idx, joints_pos, joints_rot, new_joint_names) self.keyframes[frame_idx]["joint_names"] = new_joint_names def _get_label_pos(self, frame_idx: int): return self.keyframes[frame_idx]["joints_pos"][self.skeleton.root_idx] def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int): self._remove_interval_and_update_label(interval_id, start_frame_idx, end_frame_idx) def get_constraint_info(self, device: Optional[str] = None): all_joints_pos = [] all_joints_rot = [] all_joints_names = [] all_end_effector_type = [] for v in self.keyframes.values(): joints_pos = to_torch(v["joints_pos"], device=device) joints_rot = to_torch(v["joints_rot"], device=device) if len(joints_pos.shape) == 2: all_joints_pos.append(joints_pos[None]) else: all_joints_pos.append(joints_pos) if len(joints_rot.shape) == 3: all_joints_rot.append(joints_rot[None]) else: all_joints_rot.append(joints_rot) all_joints_names.append(v["joint_names"]) all_end_effector_type.append(v["end_effector_type"]) all_joints_pos = torch.cat(all_joints_pos, dim=0) if len(all_joints_pos) > 0 else None all_joints_rot = torch.cat(all_joints_rot, dim=0) if len(all_joints_rot) > 0 else None return { "frame_idx": self.get_frame_idx(), "joints_pos": all_joints_pos, "joints_rot": all_joints_rot, "joint_names": all_joints_names, "end_effector_type": all_end_effector_type, } def clear(self, frame_idx: Optional[int] = None, scene_elements_only: bool = False): frame_idx_list = list(self.keyframes.keys()) if frame_idx is None else [frame_idx] for fidx in frame_idx_list: self.scene_elements[fidx]["skeleton_mesh"].clear() if "ee_rotation_axes" in self.scene_elements[fidx]: self.server.scene.remove_by_name(self.scene_elements[fidx]["ee_rotation_axes"].name) if "label" in self.scene_elements[fidx]: self.server.scene.remove_by_name(self.scene_elements[fidx]["label"].name) self.scene_elements.pop(fidx) if not scene_elements_only: self.keyframes.pop(fidx) if frame_idx is None: # clear all interval labels if clearing all keyframes for interval_label in list(self.interval_labels.values()): self.server.scene.remove_by_name(interval_label.name) self.interval_labels.clear() def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None: show_all = only_frame is None for fidx, scene_data in self.scene_elements.items(): visible = show_all or fidx == only_frame scene_data["skeleton_mesh"].set_visibility(visible) if "ee_rotation_axes" in scene_data: scene_data["ee_rotation_axes"].visible = visible label = scene_data.get("label") if label is not None: label.visible = visible and self.labels_visible for interval_label in self.interval_labels.values(): interval_label.visible = show_all and self.labels_visible class RootKeyframe2DSet(ConstraintSet): def __init__( self, name: str, server: viser.ViserServer, skeleton: SkeletonBase, display_name: Optional[str] = None, ): super().__init__(name, server, skeleton, display_name=display_name) self.dense_path = False self.smooth_path = True self.line_segments = None # visualization of dense path self.interval_line_segments = {} def add_keyframe( self, keyframe_id: str, frame_idx: int, root_pos: torch.Tensor | np.ndarray, viz_label: bool = True, update_path: bool = True, viz_waypoint: bool = True, exists_ok: bool = False, ): """Adds a single 2D root keyframe at the given frame or updates the existing one at this frame. Args: keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx. frame_idx: int, frame index to add the keyframe at root_pos: torch.Tensor, [3] root position to add the keyframe at, y entry (index 1) should be 0 viz_label: bool, whether to visualize the label for the keyframe """ root_pos_np = to_numpy(root_pos) if frame_idx not in self.scene_elements: self.scene_elements[frame_idx] = {} scene_data = self.scene_elements[frame_idx] if frame_idx in self.keyframes: waypoint = scene_data.get("waypoint") if waypoint is not None: waypoint.update_position(root_pos_np) elif viz_waypoint: waypoint = WaypointMesh( f"/{self.name}/waypoint_{frame_idx}", self.server, position=root_pos_np, ) scene_data["waypoint"] = waypoint label = scene_data.get("label") if viz_label and label is not None: label.position = root_pos_np label.visible = self.labels_visible elif viz_label and label is None: label = self.server.scene.add_label( name=f"/{self.name}/label_{frame_idx}", text=f"{self.display_name} @ {frame_idx}", position=root_pos_np, font_size_mode="screen", font_screen_scale=0.7, anchor="bottom-left", ) label.visible = self.labels_visible scene_data["label"] = label else: if viz_waypoint: waypoint = WaypointMesh( f"/{self.name}/waypoint_{frame_idx}", self.server, position=root_pos_np, ) scene_data["waypoint"] = waypoint if viz_label: label = self.server.scene.add_label( name=f"/{self.name}/label_{frame_idx}", text=f"{self.display_name} @ {frame_idx}", position=root_pos_np, font_size_mode="screen", font_screen_scale=0.7, anchor="bottom-left", ) label.visible = self.labels_visible scene_data["label"] = label # set/update data self.keyframes[frame_idx] = root_pos_np if frame_idx not in self.frame2keyid: self.frame2keyid[frame_idx] = [] if keyframe_id in self.frame2keyid[frame_idx]: if not exists_ok: raise AssertionError("keyframe_id already exists in this frame!") else: self.frame2keyid[frame_idx].append(keyframe_id) # need to update path visualization if self.line_segments is not None and update_path: self.update_line_segments() def add_interval( self, interval_id: str, start_frame_idx: int, end_frame_idx: int, root_pos: torch.Tensor | np.ndarray, ): """Adds an interval of 2D root keyframes between the given start and end frames. Args: interval_id: str, id for the interval. Must be unique within the given start_frame_idx and end_frame_idx. start_frame_idx: int, start frame index to add the interval at end_frame_idx: int, end frame index to add the interval at root_pos: torch.Tensor, [T, 3] root positions to add the interval at """ root_pos_np = to_numpy(root_pos) assert root_pos_np.shape[0] == end_frame_idx - start_frame_idx + 1 if root_pos_np.shape[0] >= 2: points = np.zeros((root_pos_np.shape[0] - 1, 2, 3)) points[:, 0] = root_pos_np[:-1] points[:, 1] = root_pos_np[1:] if interval_id in self.interval_line_segments: self.server.scene.remove_by_name(self.interval_line_segments[interval_id].name) self.interval_line_segments[interval_id] = self.server.scene.add_line_segments( name=f"/{self.name}/interval_{interval_id}_line", points=points, colors=(255, 0, 0), line_width=5.0, ) for frame_idx in range(start_frame_idx, end_frame_idx + 1): rel_idx = frame_idx - start_frame_idx self.add_keyframe( interval_id, frame_idx, root_pos_np[rel_idx], viz_label=False, update_path=False, viz_waypoint=False, ) self._add_interval_label(start_frame_idx, end_frame_idx) if self.line_segments is not None: self.update_line_segments() def set_smooth_path(self, smooth_path: bool): self.smooth_path = smooth_path if self.line_segments is not None: self.update_line_segments() def set_dense_path(self, dense_path: bool): """If dense_path is True, will make the path dense by interpolated between added keyframes. Args: dense_path: bool, whether to make the path dense """ self.dense_path = dense_path if self.dense_path: # visualize dense path with line segments self.line_segments = self.server.scene.add_line_segments( name=f"/{self.name}/line_segments", points=np.zeros((1, 2, 3)), colors=(255, 0, 0), line_width=5.0, ) self.update_line_segments() else: if self.line_segments is not None: self.server.scene.remove_by_name(self.line_segments.name) self.line_segments = None def interpolate_path(self, t: np.ndarray): """Interpolates the path between the given frame indices. Args: t: np.ndarray, frame indices to interpolate at """ from scipy.interpolate import interp1d cur_info = self._get_sparse_constraint_info() frame_idx = cur_info["frame_idx"] all_root_pos = cur_info["root_pos"] x = all_root_pos[:, 0] z = all_root_pos[:, 2] kind = "linear" # if self.smooth_path and len(frame_idx) >= 3: # kind = "quadratic" interp_x = interp1d(frame_idx, x, kind=kind) interp_z = interp1d(frame_idx, z, kind=kind) x_new = interp_x(t) z_new = interp_z(t) path3d = np.stack([x_new, np.zeros_like(x_new), z_new], axis=1) if self.smooth_path and len(frame_idx) >= 3: path3d = get_smooth_root_pos(torch.from_numpy(path3d[None]))[0].numpy() return path3d def update_line_segments(self): if len(self.keyframes) < 2: return t = np.array(sorted(self.get_frame_idx())) if self.smooth_path: # more points for smoothed curve t = np.linspace(t[0], t[-1], 100) path3d = self.interpolate_path(t) points = np.zeros((len(path3d) - 1, 2, 3)) points[:, 0] = path3d[:-1] points[:, 1] = path3d[1:] self.line_segments.points = points def remove_keyframe(self, keyframe_id: str, frame_idx: int): if frame_idx not in self.keyframes: return if keyframe_id not in self.frame2keyid[frame_idx]: return self.frame2keyid[frame_idx].remove(keyframe_id) if len(self.frame2keyid[frame_idx]) == 0: del self.frame2keyid[frame_idx] self.clear(frame_idx) if self.line_segments is not None: self.update_line_segments() def _get_label_pos(self, frame_idx: int): return self.keyframes[frame_idx] def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int): if interval_id in self.interval_line_segments: self.server.scene.remove_by_name(self.interval_line_segments[interval_id].name) del self.interval_line_segments[interval_id] self._remove_interval_and_update_label(interval_id, start_frame_idx, end_frame_idx) def _get_sparse_constraint_info(self): all_root_pos = [] for v in self.keyframes.values(): v_np = to_numpy(v) if len(v_np.shape) == 1: all_root_pos.append(v_np[None]) else: all_root_pos.append(v_np) if len(all_root_pos) > 0: all_root_pos = np.concatenate(all_root_pos, axis=0) else: all_root_pos = None return { "frame_idx": self.get_frame_idx(), "root_pos": all_root_pos, } def get_constraint_info(self, device: Optional[str] = None): if not self.dense_path or len(self.keyframes) == 0: info = self._get_sparse_constraint_info() return { "frame_idx": info["frame_idx"], "root_pos": to_torch(info["root_pos"], device=device, dtype=torch.float32), } else: frame_idx_list = self.get_frame_idx() min_frame_idx = min(frame_idx_list) max_frame_idx = max(frame_idx_list) t = np.arange(min_frame_idx, max_frame_idx + 1) path3d = self.interpolate_path(t) return { "frame_idx": t.tolist(), "root_pos": to_torch(path3d, device=device, dtype=torch.float32), } def clear(self, frame_idx: Optional[int] = None): frame_idx_list = list(self.keyframes.keys()) if frame_idx is None else [frame_idx] for fidx in frame_idx_list: scene_data = self.scene_elements.get(fidx, {}) waypoint = scene_data.get("waypoint") if waypoint is not None: waypoint.clear() label = scene_data.get("label") if label is not None: self.server.scene.remove_by_name(label.name) self.keyframes.pop(fidx) self.scene_elements.pop(fidx) if frame_idx is None: # clear all interval labels if clearing all keyframes for interval_label in list(self.interval_labels.values()): self.server.scene.remove_by_name(interval_label.name) self.interval_labels.clear() # clear line segments if turning off dense path if self.line_segments is not None: self.server.scene.remove_by_name(self.line_segments.name) self.line_segments = None for interval_line in list(self.interval_line_segments.values()): self.server.scene.remove_by_name(interval_line.name) self.interval_line_segments.clear() def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None: show_all = only_frame is None for fidx, scene_data in self.scene_elements.items(): visible = show_all or fidx == only_frame waypoint = scene_data.get("waypoint") if waypoint is not None: waypoint.set_visible(visible) label = scene_data.get("label") if label is not None: label.visible = visible and self.labels_visible if self.line_segments is not None: self.line_segments.visible = show_all for line_handle in self.interval_line_segments.values(): line_handle.visible = show_all for interval_label in self.interval_labels.values(): interval_label.visible = show_all and self.labels_visible # # GUI Elements that need to be tracked ================================================ FILE: kimodo/viz/coords.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Pure numpy coordinate/rotation helpers for viz.""" import numpy as np def skew(v: np.ndarray) -> np.ndarray: """Skew-symmetric matrix for cross products: skew(v) @ x == np.cross(v, x).""" vx, vy, vz = float(v[0]), float(v[1]), float(v[2]) return np.array([[0.0, -vz, vy], [vz, 0.0, -vx], [-vy, vx, 0.0]], dtype=np.float64) def rotation_matrix_from_two_vec(v_from: np.ndarray, v_to: np.ndarray, eps: float = 1e-8) -> np.ndarray: """Return R such that R @ v_from ~= v_to (both treated as 3D vectors). Uses a Rodrigues-style construction, with special handling for near-parallel and near-opposite vectors for numerical stability. """ a = np.asarray(v_from, dtype=np.float64).reshape(3) b = np.asarray(v_to, dtype=np.float64).reshape(3) na = np.linalg.norm(a) nb = np.linalg.norm(b) if na < eps or nb < eps: return np.eye(3, dtype=np.float64) a = a / na b = b / nb c = float(np.clip(np.dot(a, b), -1.0, 1.0)) # cos(theta) if c > 1.0 - eps: return np.eye(3, dtype=np.float64) if c < -1.0 + eps: # 180 deg rotation about any axis orthogonal to a: # R = -I + 2 * uu^T, where u is a unit axis orthogonal to a. axis_seed = np.array([1.0, 0.0, 0.0], dtype=np.float64) if abs(float(np.dot(a, axis_seed))) > 0.9: axis_seed = np.array([0.0, 1.0, 0.0], dtype=np.float64) u = np.cross(a, axis_seed) u = u / np.linalg.norm(u).clip(min=eps) return -np.eye(3, dtype=np.float64) + 2.0 * np.outer(u, u) v = np.cross(a, b) s2 = float(np.dot(v, v)) # ||v||^2 == sin^2(theta) K = skew(v) # R = I + K + K^2 * ((1 - c) / s^2) return np.eye(3, dtype=np.float64) + K + (K @ K) * ((1.0 - c) / s2) ================================================ FILE: kimodo/viz/g1_rig.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """G1 robot rig: mesh loading, joint mapping, and viser scene setup for G1 skeleton.""" import os import xml.etree.ElementTree as ET from typing import Any, Optional, Tuple import numpy as np import trimesh import viser import viser.transforms as tf from kimodo.assets import skeleton_asset_path from kimodo.skeleton import G1Skeleton34 # MuJoCo (z-up, x-forward) -> kimodo (y-up, z-forward) MUJOCO_TO_KIMODO = np.array([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]], dtype=np.float64) # MuJoCo (z-up, x-forward) -> kimodo (y-up, z-forward) MUJOCO_TO_KIMODO = np.array([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]], dtype=np.float64) G1_MESH_JOINT_MAP = { "pelvis_skel": ["pelvis.STL", "pelvis_contour_link.STL"], "left_hip_pitch_skel": ["left_hip_pitch_link.STL"], "left_hip_roll_skel": ["left_hip_roll_link.STL"], "left_hip_yaw_skel": ["left_hip_yaw_link.STL"], "left_knee_skel": ["left_knee_link.STL"], "left_ankle_pitch_skel": ["left_ankle_pitch_link.STL"], "left_ankle_roll_skel": ["left_ankle_roll_link.STL"], "right_hip_pitch_skel": ["right_hip_pitch_link.STL"], "right_hip_roll_skel": ["right_hip_roll_link.STL"], "right_hip_yaw_skel": ["right_hip_yaw_link.STL"], "right_knee_skel": ["right_knee_link.STL"], "right_ankle_pitch_skel": ["right_ankle_pitch_link.STL"], "right_ankle_roll_skel": ["right_ankle_roll_link.STL"], "waist_yaw_skel": ["waist_yaw_link_rev_1_0.STL", "waist_yaw_link.STL"], "waist_roll_skel": ["waist_roll_link_rev_1_0.STL", "waist_roll_link.STL"], "waist_pitch_skel": [ "torso_link_rev_1_0.STL", "torso_link.STL", "logo_link.STL", "head_link.STL", ], "left_shoulder_pitch_skel": ["left_shoulder_pitch_link.STL"], "left_shoulder_roll_skel": ["left_shoulder_roll_link.STL"], "left_shoulder_yaw_skel": ["left_shoulder_yaw_link.STL"], "left_elbow_skel": ["left_elbow_link.STL"], "left_wrist_roll_skel": ["left_wrist_roll_link.STL"], "left_wrist_pitch_skel": ["left_wrist_pitch_link.STL"], "left_wrist_yaw_skel": ["left_wrist_yaw_link.STL", "left_rubber_hand.STL"], "right_shoulder_pitch_skel": ["right_shoulder_pitch_link.STL"], "right_shoulder_roll_skel": ["right_shoulder_roll_link.STL"], "right_shoulder_yaw_skel": ["right_shoulder_yaw_link.STL"], "right_elbow_skel": ["right_elbow_link.STL"], "right_wrist_roll_skel": ["right_wrist_roll_link.STL"], "right_wrist_pitch_skel": ["right_wrist_pitch_link.STL"], "right_wrist_yaw_skel": ["right_wrist_yaw_link.STL", "right_rubber_hand.STL"], } # Joint axis/limits from g1.xml (used by exports, e.g. MujocoQposConverter) _G1_JOINT_AXIS_INDEX_CACHE: Optional[dict[str, int]] = None _G1_JOINT_LIMITS_CACHE: Optional[dict[str, tuple[float, float]]] = None def _get_g1_joint_axis_indices() -> dict[str, int]: """Return a map from G1 joint names to a single rotation axis index.""" global _G1_JOINT_AXIS_INDEX_CACHE if _G1_JOINT_AXIS_INDEX_CACHE is not None: return _G1_JOINT_AXIS_INDEX_CACHE xml_path = str(skeleton_asset_path("g1skel34", "xml", "g1.xml")) if not os.path.exists(xml_path): _G1_JOINT_AXIS_INDEX_CACHE = {} return _G1_JOINT_AXIS_INDEX_CACHE tree = ET.parse(xml_path) root = tree.getroot() joint_axes = {} for xml_class in tree.findall(".//default"): if "class" not in xml_class.attrib: continue joint_nodes = xml_class.findall("joint") if joint_nodes: joint_axes[xml_class.get("class")] = joint_nodes[0].get("axis") axis_indices_by_name: dict[str, int] = {} for joint in root.find("worldbody").findall(".//joint"): axis_str = joint.get("axis") or joint_axes.get(joint.get("class")) if axis_str is None: continue axis_vals = np.array([float(x) for x in axis_str.split()], dtype=np.float64) if not np.any(axis_vals): continue axis_kimodo = MUJOCO_TO_KIMODO @ axis_vals axis_idx = int(np.argmax(np.abs(axis_kimodo))) axis_indices_by_name[joint.get("name").replace("_joint", "_skel")] = axis_idx _G1_JOINT_AXIS_INDEX_CACHE = axis_indices_by_name return _G1_JOINT_AXIS_INDEX_CACHE def _get_g1_joint_limits() -> dict[str, tuple[float, float]]: """Return a map from G1 joint names to (min, max) angle limits in radians.""" global _G1_JOINT_LIMITS_CACHE if _G1_JOINT_LIMITS_CACHE is not None: return _G1_JOINT_LIMITS_CACHE xml_path = str(skeleton_asset_path("g1skel34", "xml", "g1.xml")) if not os.path.exists(xml_path): _G1_JOINT_LIMITS_CACHE = {} return _G1_JOINT_LIMITS_CACHE tree = ET.parse(xml_path) root = tree.getroot() class_ranges: dict[str, tuple[float, float]] = {} for xml_class in tree.findall(".//default"): class_name = xml_class.get("class") if not class_name: continue joint_nodes = xml_class.findall("joint") if not joint_nodes: continue range_str = joint_nodes[0].get("range") if not range_str: continue range_vals = [float(x) for x in range_str.split()] if len(range_vals) != 2: continue class_ranges[class_name] = (range_vals[0], range_vals[1]) joint_limits: dict[str, tuple[float, float]] = {} worldbody = root.find("worldbody") if worldbody is None: _G1_JOINT_LIMITS_CACHE = {} return _G1_JOINT_LIMITS_CACHE for joint in worldbody.findall(".//joint"): range_str = joint.get("range") or class_ranges.get(joint.get("class")) if range_str is None: continue if isinstance(range_str, tuple): joint_range = range_str else: range_vals = [float(x) for x in range_str.split()] if len(range_vals) != 2: continue joint_range = (range_vals[0], range_vals[1]) joint_name = joint.get("name") if not joint_name: continue joint_limits[joint_name.replace("_joint", "_skel")] = joint_range _G1_JOINT_LIMITS_CACHE = joint_limits return _G1_JOINT_LIMITS_CACHE _G1_JOINT_F2Q_DATA_CACHE: Optional[dict[str, dict[str, Any]]] = None def get_g1_joint_f2q_data( skeleton: G1Skeleton34, ) -> dict[str, dict[str, Any]]: """Return per-hinge-joint f2q data for correct 1-DoF + limits in offset space. Each entry is for a G1 hinge joint (by name) and contains: - "offset_f2q": (3, 3) matrix such that R_f2q = offset_f2q @ R_local (kimodo). - "axis_f2q": (3,) unit axis in f2q space; angle = dot(axis_angle(R_f2q), axis_f2q). - "rest_dof_axis_angle": angle (rad) at T-pose in f2q space; MuJoCo q = angle_f2q - this. Limits from the XML apply to q = angle_f2q - rest_dof_axis_angle. """ global _G1_JOINT_F2Q_DATA_CACHE if _G1_JOINT_F2Q_DATA_CACHE is not None: return _G1_JOINT_F2Q_DATA_CACHE from kimodo.exports.mujoco import MujocoQposConverter converter = MujocoQposConverter(skeleton) # converter: _rot_offsets_f2q[kimodo_idx], _mujoco_joint_axis_values_f2q_space[hinge_idx], # _rest_dofs_axis_angle[hinge_idx], _kimodo_indices_to_mujoco_indices[kimodo_idx] = hinge_idx+1 or 0 out: dict[str, dict[str, Any]] = {} for j in range(skeleton.nbjoints): mujoco_one_based = converter._kimodo_indices_to_mujoco_indices[j].item() if mujoco_one_based <= 0: continue hinge_idx = mujoco_one_based - 1 joint_name = skeleton.bone_order_names[j] offset_f2q = converter._rot_offsets_f2q[j].detach().cpu().numpy().astype(np.float64) axis_f2q = converter._mujoco_joint_axis_values_f2q_space[hinge_idx].detach().cpu().numpy().astype(np.float64) n = np.linalg.norm(axis_f2q) if n > 1e-10: axis_f2q = axis_f2q / n rest_dof = float(converter._rest_dofs_axis_angle[hinge_idx].detach().cpu().numpy()) out[joint_name] = { "offset_f2q": offset_f2q, "axis_f2q": axis_f2q, "rest_dof_axis_angle": rest_dof, } _G1_JOINT_F2Q_DATA_CACHE = out return out # ----------------------------------------------------------------------------- # Mesh loading cache (shared across G1 rig instances; each rig gets its own scene meshes) # ----------------------------------------------------------------------------- _G1_MESH_DATA_CACHE: dict[str, list[dict]] = {} def _load_g1_mesh_data( mesh_dir: str, skeleton: G1Skeleton34, ) -> list[dict]: """Load STL meshes and XML transforms once per mesh_dir; shared across rig instances.""" if mesh_dir in _G1_MESH_DATA_CACHE: return _G1_MESH_DATA_CACHE[mesh_dir] mesh_geom_cache = G1MeshRig._mesh_geom_cache mesh_transform_cache = G1MeshRig._mesh_transform_cache # Load XML-derived transforms (cached inside _get_mesh_local_transforms_impl) mesh_file_transforms = _get_mesh_local_transforms_impl(mesh_dir, mesh_transform_cache) data_list: list[dict] = [] for joint_name, mesh_files in G1_MESH_JOINT_MAP.items(): if joint_name not in skeleton.bone_index: continue joint_idx = skeleton.bone_index[joint_name] for mesh_file in mesh_files: mesh_path = os.path.join(mesh_dir, mesh_file) if not os.path.exists(mesh_path): continue vertices, faces = _get_mesh_geometry_impl(mesh_file, mesh_path, mesh_dir, mesh_geom_cache) if vertices is None: continue geom_pos, geom_rot = mesh_file_transforms.get( mesh_file, (np.zeros(3, dtype=np.float64), np.eye(3, dtype=np.float64)), ) data_list.append( { "mesh_file": mesh_file, "vertices": vertices, "faces": faces, "joint_idx": joint_idx, "geom_pos": geom_pos.copy(), "geom_rot": geom_rot.copy(), } ) _G1_MESH_DATA_CACHE[mesh_dir] = data_list return data_list def _get_mesh_geometry_impl( mesh_file: str, mesh_path: str, mesh_dir: str, mesh_geom_cache: dict, ) -> tuple[Optional[np.ndarray], Optional[np.ndarray]]: """Load one STL; result cached per mesh_dir and shared across rigs.""" cached = mesh_geom_cache.get(mesh_dir) if cached is not None and mesh_file in cached: vertices, faces = cached[mesh_file] return vertices.copy(), faces.copy() mesh = trimesh.load_mesh(mesh_path, process=True) if isinstance(mesh, trimesh.Scene): mesh = trimesh.util.concatenate(mesh.dump()) vertices = mesh.vertices @ MUJOCO_TO_KIMODO.T faces = mesh.faces if mesh_dir not in mesh_geom_cache: mesh_geom_cache[mesh_dir] = {} mesh_geom_cache[mesh_dir][mesh_file] = (vertices, faces) return vertices.copy(), faces.copy() def _get_mesh_local_transforms_impl( mesh_dir: str, mesh_transform_cache: dict, ) -> dict[str, tuple[np.ndarray, np.ndarray]]: """Parse g1.xml once per mesh_dir; result shared across G1 rig instances.""" cached = mesh_transform_cache.get(mesh_dir) if cached is not None: return {mesh_file: (pos.copy(), rot.copy()) for mesh_file, (pos, rot) in cached.items()} xml_path = os.path.abspath(os.path.join(mesh_dir, "..", "..", "xml", "g1.xml")) if not os.path.exists(xml_path): return {} tree = ET.parse(xml_path) root = tree.getroot() mesh_file_to_mesh_name = {} for mesh in root.findall(".//asset/mesh"): mesh_name = mesh.get("name") mesh_file = mesh.get("file") if mesh_name and mesh_file: mesh_file_to_mesh_name[mesh_file] = mesh_name mesh_name_to_transform = {} for geom in root.findall(".//geom"): mesh_name = geom.get("mesh") if mesh_name is None: continue pos = geom.get("pos") quat = geom.get("quat") if pos is None: geom_pos = np.zeros(3, dtype=np.float64) else: geom_pos = np.array([float(x) for x in pos.split()], dtype=np.float64) if quat is None: geom_rot = np.eye(3, dtype=np.float64) else: wxyz = np.array([float(x) for x in quat.split()], dtype=np.float64) geom_rot = tf.SO3(wxyz=wxyz).as_matrix() mesh_name_to_transform[mesh_name] = (geom_pos, geom_rot) mesh_file_transforms = {} for mesh_file, mesh_name in mesh_file_to_mesh_name.items(): geom_pos, geom_rot = mesh_name_to_transform.get( mesh_name, (np.zeros(3, dtype=np.float64), np.eye(3, dtype=np.float64)), ) geom_pos = MUJOCO_TO_KIMODO @ geom_pos geom_rot = MUJOCO_TO_KIMODO @ geom_rot @ MUJOCO_TO_KIMODO.T mesh_file_transforms[mesh_file] = (geom_pos, geom_rot) mesh_transform_cache[mesh_dir] = {mf: (pos.copy(), rot.copy()) for mf, (pos, rot) in mesh_file_transforms.items()} return mesh_file_transforms class G1MeshRig: """Rig for G1 STL meshes. Each instance has its own scene meshes (so clear() only removes one character). Loading is shared: STL files and g1.xml are cached per mesh_dir via _load_g1_mesh_data() and the class- level _mesh_*_cache dicts. """ _mesh_geom_cache: dict[str, dict[str, tuple[np.ndarray, np.ndarray]]] = {} _mesh_transform_cache: dict[str, dict[str, tuple[np.ndarray, np.ndarray]]] = {} def __init__( self, name: str, server: viser.ViserServer | viser.ClientHandle, skeleton: G1Skeleton34, mesh_dir: str, color: Tuple[int, int, int], ): self.server = server self.skeleton = skeleton self.mesh_dir = mesh_dir self.color = color self.mesh_handles: list[viser.SceneHandle] = [] self.mesh_items: list[dict[str, object]] = [] self._defer_initial_visibility = True data_list = _load_g1_mesh_data(mesh_dir, skeleton) for item in data_list: mesh_file = item["mesh_file"] vertices = item["vertices"] faces = item["faces"] joint_idx = item["joint_idx"] geom_pos = item["geom_pos"] geom_rot = item["geom_rot"] handle = self.server.scene.add_mesh_simple( f"/{name}/g1_mesh/{os.path.splitext(mesh_file)[0]}", vertices=vertices, faces=faces, opacity=None, color=self.color, wireframe=False, visible=not self._defer_initial_visibility, ) self.mesh_handles.append(handle) self.mesh_items.append( { "handle": handle, "joint_idx": joint_idx, "geom_pos": geom_pos, "geom_rot": geom_rot, } ) if self._defer_initial_visibility: for handle in self.mesh_handles: handle.visible = True def set_visibility(self, visible: bool) -> None: for handle in self.mesh_handles: handle.visible = visible def set_opacity(self, opacity: float) -> None: for handle in self.mesh_handles: handle.opacity = opacity def set_wireframe(self, wireframe: bool) -> None: for handle in self.mesh_handles: handle.wireframe = wireframe def set_color(self, color: Tuple[int, int, int]) -> None: self.color = color for handle in self.mesh_handles: handle.color = color def set_pose(self, joints_pos: np.ndarray, joints_rot: np.ndarray) -> None: for item in self.mesh_items: handle = item["handle"] joint_idx = item["joint_idx"] geom_pos = item["geom_pos"] geom_rot = item["geom_rot"] joint_pos = joints_pos[joint_idx] joint_rot = joints_rot[joint_idx] mesh_pos = joint_pos + joint_rot @ geom_pos mesh_rot = joint_rot @ geom_rot handle.position = mesh_pos handle.wxyz = tf.SO3.from_matrix(mesh_rot).wxyz def clear(self) -> None: for handle in self.mesh_handles: self.server.scene.remove_by_name(handle.name) self.mesh_handles = [] self.mesh_items = [] ================================================ FILE: kimodo/viz/gui.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """GUI element handles for the demo app.""" from dataclasses import dataclass import viser @dataclass class GuiElements: gui_play_pause_button: viser.GuiInputHandle gui_next_frame_button: viser.GuiInputHandle gui_prev_frame_button: viser.GuiInputHandle gui_generate_button: viser.GuiInputHandle gui_model_fps: viser.GuiInputHandle[int] gui_timeline: viser.GuiInputHandle[int] gui_viz_skeleton_checkbox: viser.GuiInputHandle[bool] gui_viz_foot_contacts_checkbox: viser.GuiInputHandle[bool] gui_viz_skinned_mesh_checkbox: viser.GuiInputHandle[bool] gui_viz_skinned_mesh_opacity_slider: viser.GuiInputHandle[float] gui_camera_fov_slider: viser.GuiInputHandle[float] # generation controls gui_duration_slider: viser.GuiInputHandle[float] gui_num_samples_slider: viser.GuiInputHandle[int] gui_cfg_checkbox: viser.GuiCheckboxHandle gui_cfg_text_weight_slider: viser.GuiInputHandle[float] gui_cfg_constraint_weight_slider: viser.GuiInputHandle[float] gui_diffusion_steps_slider: viser.GuiInputHandle[int] gui_seed: viser.GuiInputHandle[int] gui_postprocess_checkbox: viser.GuiCheckboxHandle gui_root_margin: viser.GuiInputHandle[float] gui_real_robot_rotations_checkbox: viser.GuiInputHandle[bool] # appearance gui_dark_mode_checkbox: viser.GuiCheckboxHandle # which skinning method to use for SOMA gui_use_soma_layer_checkbox: viser.GuiCheckboxHandle ================================================ FILE: kimodo/viz/playback.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Playback and motion editing: CharacterMotion.""" from typing import Callable, Literal, Optional import numpy as np import torch import viser.transforms as tf from kimodo.skeleton import ( G1Skeleton34, SOMASkeleton30, SOMASkeleton77, batch_rigid_transform, global_rots_to_local_rots, ) from kimodo.tools import to_numpy, to_torch from .g1_rig import ( _get_g1_joint_axis_indices, _get_g1_joint_limits, get_g1_joint_f2q_data, ) from .scene import Character class CharacterMotion: def __init__( self, character: Character, joints_pos: torch.Tensor, joints_rot: torch.Tensor, foot_contacts: Optional[torch.Tensor] = None, ): self.character = character self.server = character.server self.skeleton = character.skeleton self.name = character.name # [T, J, 3] global joint positions self.joints_pos = joints_pos # [T, J, 3, 3] global joint rotation matrices self.joints_rot = joints_rot assert joints_pos.shape[0] == joints_rot.shape[0] # keep track of local rots as well for convenience during pose editing self.joints_local_rot = global_rots_to_local_rots(joints_rot, self.skeleton) self.length = joints_pos.shape[0] self.cur_frame_idx = None self.foot_contacts = foot_contacts if foot_contacts is not None: assert foot_contacts.shape[0] == self.length self.precompute_mesh_info() # gizmos for pose editing self.root_translation_gizmo = None self.updating_root_translation_gizmo = False self.joint_gizmos = None self.updating_joint_gizmos = False self.gizmo_space: Literal["world", "local"] = "local" self._drag_start_world_rot: list = [] self._joint_gizmo_dragging: list[bool] = [] def precompute_mesh_info(self): if self.character.skeleton_mesh is not None: print("Caching skeleton mesh info...") self.character.skeleton_mesh.precompute_mesh_info(self.joints_pos) if self.character.skinned_mesh is not None: print("Caching skinning info...") self.character.precompute_skinning(self.joints_pos, self.joints_rot) def set_frame(self, idx: int): """Sets the pose of the character to the given frame index.""" idx = min(idx, self.length - 1) # clamp to last frame cur_foot_contacts = self.foot_contacts[idx] if self.foot_contacts is not None else None self.character.set_pose( self.joints_pos[idx], self.joints_rot[idx], frame_idx=idx, foot_contacts=cur_foot_contacts, ) self.cur_frame_idx = idx # update gizmos if frame has changed due to playback cur_root_pos = self.joints_pos[self.cur_frame_idx, self.skeleton.root_idx].clone() cur_root_pos[1] = 0.0 if self.root_translation_gizmo is not None and not self.updating_root_translation_gizmo: self.root_translation_gizmo.position = cur_root_pos.cpu().numpy() if self.joint_gizmos is not None: for i, joint_gizmo in enumerate(self.joint_gizmos): # Do not push wxyz/position while this gizmo is being dragged; # otherwise the client receives e.g. identity and the gizmo snaps back. if not self.updating_joint_gizmos and not self._joint_gizmo_dragging[i]: joint_gizmo.position = self.joints_pos[self.cur_frame_idx, i].cpu().numpy() if self.gizmo_space == "world": joint_gizmo.wxyz = (1.0, 0.0, 0.0, 0.0) else: joint_gizmo.wxyz = tf.SO3.from_matrix(self.joints_rot[self.cur_frame_idx, i].cpu().numpy()).wxyz def update_pose_at_frame( self, frame_idx: int, joints_pos: Optional[torch.Tensor] = None, joints_rot: Optional[torch.Tensor] = None, joints_local_rot: Optional[torch.Tensor] = None, foot_contacts: Optional[torch.Tensor] = None, ): """Overwrites one or more of the pose components at the given frame. If only a subset of joints_pos, joints_rot, or joints_local_rot are provided, the other components will be updated with FK. """ if joints_pos is not None: joints_pos = to_torch(joints_pos, device=self.joints_pos.device, dtype=self.joints_pos.dtype) self.joints_pos[frame_idx] = joints_pos if joints_local_rot is None and joints_rot is None: raise NotImplementedError("No IK to update joint rotations accordingly.") if joints_rot is not None: joints_rot = to_torch(joints_rot, device=self.joints_rot.device, dtype=self.joints_rot.dtype) self.joints_rot[frame_idx] = joints_rot if joints_local_rot is None: # update local rots from global rots self.joints_local_rot[frame_idx] = global_rots_to_local_rots(joints_rot, self.skeleton) if joints_pos is None: # need to update with FK new_posed_joints, _ = batch_rigid_transform( self.joints_local_rot[frame_idx : frame_idx + 1], self.skeleton.neutral_joints[None].to(self.joints_local_rot.device), self.skeleton.joint_parents.to(self.joints_local_rot.device), self.skeleton.root_idx, ) new_posed_joints = ( new_posed_joints[0] + self.joints_pos[frame_idx, self.skeleton.root_idx : self.skeleton.root_idx + 1] - self.skeleton.neutral_joints[[self.skeleton.root_idx]] ) self.joints_pos[frame_idx] = new_posed_joints if joints_local_rot is not None: joints_local_rot = to_torch(joints_local_rot, device=self.joints_local_rot.device).to( dtype=self.joints_local_rot.dtype ) self.joints_local_rot[frame_idx] = joints_local_rot if joints_rot is None or joints_pos is None: # need to update with FK new_posed_joints, new_global_rots = batch_rigid_transform( self.joints_local_rot[frame_idx : frame_idx + 1], self.skeleton.neutral_joints[None].to(self.joints_local_rot.device), self.skeleton.joint_parents.to(self.joints_local_rot.device), self.skeleton.root_idx, ) new_posed_joints = ( new_posed_joints[0] + self.joints_pos[frame_idx, self.skeleton.root_idx : self.skeleton.root_idx + 1] - self.skeleton.neutral_joints[[self.skeleton.root_idx]] ) if joints_rot is None: self.joints_rot[frame_idx] = new_global_rots[0] if joints_pos is None: self.joints_pos[frame_idx] = new_posed_joints if foot_contacts is not None: foot_contacts = to_torch(foot_contacts, device=self.foot_contacts.device).to(dtype=self.foot_contacts.dtype) self.foot_contacts[frame_idx] = foot_contacts if self.character.skeleton_mesh is not None: self.character.skeleton_mesh.update_mesh_info_cache(self.joints_pos[frame_idx], frame_idx) if self.character.skinned_mesh is not None: self.character.update_skinning_cache(self.joints_pos[frame_idx], self.joints_rot[frame_idx], frame_idx) def clear(self): self.character.clear() # # Editing helpers # def get_current_projected_root_pos(self) -> np.ndarray: """Get the projected root position on the ground at the current frame.""" root_pos = self.joints_pos[self.cur_frame_idx, self.skeleton.root_idx].clone() root_pos[1] = 0.0 return to_numpy(root_pos) def get_projected_root_pos(self, start_frame_idx: int, end_frame_idx: int = None) -> np.ndarray: """If requested frames are out of range, simply pads with the last frame to get expected length.""" if end_frame_idx is None: expected_len = 1 else: expected_len = end_frame_idx - start_frame_idx + 1 if start_frame_idx >= self.length: start_frame_idx = self.length - 1 if end_frame_idx is None or expected_len == 1: root_pos = self.joints_pos[start_frame_idx, self.skeleton.root_idx].clone() root_pos[1] = 0.0 return to_numpy(root_pos) else: if end_frame_idx >= self.length: end_frame_idx = self.length - 1 root_pos = self.joints_pos[start_frame_idx : end_frame_idx + 1, self.skeleton.root_idx].clone() root_pos[:, 1] = 0.0 if root_pos.shape[0] < expected_len: # pad with the last root position root_pos = torch.cat( [ root_pos, root_pos[-1:].repeat(expected_len - root_pos.shape[0], 1), ], dim=0, ) return to_numpy(root_pos) def set_projected_root_pos_path( self, root_pos_path: np.ndarray | torch.Tensor, min_frame_idx: int = None, max_frame_idx: int = None, ): """Sets the projected root position path for the character motion. Can set only a subset of the path by providing min_frame_idx and max_frame_idx. If not provided, will set the full path. Args: root_pos_path: torch.Tensor, [T, 2] projected root positions min_frame_idx: int, optional, minimum frame index to set the path at max_frame_idx: int, optional, maximum frame index to set the path at """ if min_frame_idx is not None or max_frame_idx is not None: assert ( min_frame_idx is not None and max_frame_idx is not None ), "min_frame_idx and max_frame_idx must be provided if setting path at specific frames" if min_frame_idx >= self.length: # both are out of bounds return max_frame_idx = min(max_frame_idx, self.length - 1) root_pos_path = root_pos_path[min_frame_idx : max_frame_idx + 1] else: assert root_pos_path.shape[0] == self.length min_frame_idx = 0 max_frame_idx = self.length - 1 cur_joints_pos = self.joints_pos.clone()[min_frame_idx : max_frame_idx + 1] root_pos_tensor = to_torch(root_pos_path, device=cur_joints_pos.device, dtype=cur_joints_pos.dtype) diff = root_pos_tensor - cur_joints_pos[:, self.skeleton.root_idx, [0, 2]] cur_joints_pos[:, :, [0, 2]] += diff.unsqueeze(1) for frame_idx in range(min_frame_idx, max_frame_idx + 1): rel_idx = frame_idx - min_frame_idx self.update_pose_at_frame( frame_idx, joints_pos=cur_joints_pos[rel_idx], joints_rot=self.joints_rot[frame_idx], joints_local_rot=self.joints_local_rot[frame_idx], ) # update immediately to show changes self.set_frame(self.cur_frame_idx) def get_joints_pos(self, start_frame_idx: int, end_frame_idx: int = None) -> np.ndarray: """If requested frames are out of range, simply pads with the last frame to get expected length.""" if end_frame_idx is None: expected_len = 1 else: expected_len = end_frame_idx - start_frame_idx + 1 if start_frame_idx >= self.length: start_frame_idx = self.length - 1 if end_frame_idx is None or expected_len == 1: return to_numpy(self.joints_pos[start_frame_idx].clone()) else: if end_frame_idx >= self.length: end_frame_idx = self.length - 1 return_joints_pos = self.joints_pos[start_frame_idx : end_frame_idx + 1].clone() if return_joints_pos.shape[0] < expected_len: # pad with the last pose return_joints_pos = torch.cat( [ return_joints_pos, return_joints_pos[-1:].repeat(expected_len - return_joints_pos.shape[0], 1, 1), ], dim=0, ) return to_numpy(return_joints_pos) def get_joints_rot(self, start_frame_idx: int, end_frame_idx: int = None) -> np.ndarray: """If requested frames are out of range, simply pads with the last frame to get expected length.""" if end_frame_idx is None: expected_len = 1 else: expected_len = end_frame_idx - start_frame_idx + 1 if start_frame_idx >= self.length: start_frame_idx = self.length - 1 if end_frame_idx is None or expected_len == 1: return to_numpy(self.joints_rot[start_frame_idx].clone()) else: if end_frame_idx >= self.length: end_frame_idx = self.length - 1 return_joints_rot = self.joints_rot[start_frame_idx : end_frame_idx + 1].clone() if return_joints_rot.shape[0] < expected_len: # pad with the last pose return_joints_rot = torch.cat( [ return_joints_rot, return_joints_rot[-1:].repeat(expected_len - return_joints_rot.shape[0], 1, 1, 1), ], dim=0, ) return to_numpy(return_joints_rot) def get_current_joints_pos(self) -> torch.Tensor: return self.joints_pos[self.cur_frame_idx].clone() def get_current_joints_rot(self) -> torch.Tensor: return self.joints_rot[self.cur_frame_idx].clone() def add_root_translation_gizmo( self, constraints: dict, on_2d_root_drag_end: Optional[Callable[[], None]] = None, on_drag_start: Optional[Callable[[], None]] = None, ): """Create and initialize gizmo to control the root translation. When the user drags the root 2D gizmo, path updates are skipped until release. Optional on_2d_root_drag_end is called when the drag ends (e.g. to refresh dense path). on_drag_start is called when the drag begins (e.g. to snapshot state for undo). """ # TODO: could also allow rotation around y-axis self.root_translation_gizmo = self.server.scene.add_transform_controls( f"/{self.name}/gizmo_root_translation", scale=0.5, line_width=2.5, active_axes=(True, False, True), # only allow translation on xz plane disable_axes=False, disable_sliders=False, disable_rotations=True, depth_test=False, # render even when occluded ) init_position = self.get_current_projected_root_pos() self.root_translation_gizmo.position = init_position @self.root_translation_gizmo.on_drag_start def _(_): if on_drag_start is not None: on_drag_start() @self.root_translation_gizmo.on_update def _(_): self.updating_root_translation_gizmo = True # translate to gizmo position new_root_pos = to_torch( self.root_translation_gizmo.position, device=self.joints_pos.device, ).to(dtype=self.joints_pos.dtype) cur_joints_pos = self.joints_pos[self.cur_frame_idx].clone() root_diff = new_root_pos - cur_joints_pos[self.skeleton.root_idx] root_diff[1] = 0.0 # don't change height cur_joints_pos += root_diff[None] self.update_pose_at_frame( self.cur_frame_idx, joints_pos=cur_joints_pos, joints_rot=self.joints_rot[self.cur_frame_idx], joints_local_rot=self.joints_local_rot[self.cur_frame_idx], ) self.updating_root_translation_gizmo = False # update immediately to show user changes self.set_frame(self.cur_frame_idx) # update the 2D waypoint constraints as well if there is one if "2D Root" in constraints: root_2d_contraints = constraints["2D Root"] # if there is a constraint at that frame, we want to update it frame_idx = self.cur_frame_idx if frame_idx in root_2d_contraints.keyframes: for keyframe_id in root_2d_contraints.frame2keyid[frame_idx]: # add will modify the existing constraint # update_path=False during drag to avoid lag; path refreshes on_drag_end root_2d_contraints.add_keyframe( keyframe_id, frame_idx, root_pos=new_root_pos, exists_ok=True, update_path=False, ) if "Full-Body" in constraints: full_body_constraints = constraints["Full-Body"] # if there is a constraint at that frame, we want to update it frame_idx = self.cur_frame_idx if frame_idx in full_body_constraints.keyframes: current_dict = full_body_constraints.keyframes[frame_idx] for keyframe_id in full_body_constraints.frame2keyid[frame_idx]: # add will modify the existing constraint full_body_constraints.add_keyframe( keyframe_id, frame_idx, joints_pos=cur_joints_pos, joints_rot=current_dict["joints_rot"], exists_ok=True, ) if "End-Effectors" in constraints: end_effector_constraints = constraints["End-Effectors"] # if there is a constraint at that frame, we want to update it frame_idx = self.cur_frame_idx if frame_idx in end_effector_constraints.keyframes: current_dict = end_effector_constraints.keyframes[frame_idx] for keyframe_id, _ in end_effector_constraints.frame2keyid[frame_idx]: # add will modify the existing constraint end_effector_constraints.add_keyframe( keyframe_id, frame_idx, joints_pos=cur_joints_pos, joints_rot=current_dict["joints_rot"], joint_names=current_dict["joint_names"], end_effector_type=current_dict["end_effector_type"], exists_ok=True, ) @self.root_translation_gizmo.on_drag_end def _on_drag_end(_): # Refresh path visualization and dense path after release. if "2D Root" in constraints: root_2d = constraints["2D Root"] if root_2d.line_segments is not None: root_2d.update_line_segments() if on_2d_root_drag_end is not None: on_2d_root_drag_end() def add_joint_gizmos( self, constraints: dict, space: Literal["world", "local"] = "local", on_drag_start: Optional[Callable[[], None]] = None, ): # Remove existing joint gizmos first so the client gets remove then add, # avoiding in-place update that can briefly show duplicate gizmos. if self.joint_gizmos is not None: for joint_gizmo in self.joint_gizmos: self.server.scene.remove_by_name(joint_gizmo.name) self.joint_gizmos = None self.joint_gizmos = [] self.gizmo_space = space # For world mode: store joint world rotation at drag start to compose with # PivotControls' cumulative-from-identity drag rotation. self._drag_start_world_rot = [None] * self.skeleton.nbjoints # Skip pushing wxyz/position in set_frame while a gizmo is being dragged, # so the client does not receive "snap back" (e.g. identity for world mode). self._joint_gizmo_dragging = [False] * self.skeleton.nbjoints joint_axis_indices = None joint_limits = None joint_f2q_data = None hidden_gizmo_joints = None if isinstance(self.skeleton, G1Skeleton34): joint_axis_indices = _get_g1_joint_axis_indices() joint_limits = _get_g1_joint_limits() joint_f2q_data = get_g1_joint_f2q_data(self.skeleton) hidden_gizmo_joints = { "left_hand_roll_skel", "right_hand_roll_skel", "left_toe_base", "right_toe_base", } elif isinstance(self.skeleton, SOMASkeleton77): skel30_names = {name for name, _ in SOMASkeleton30.bone_order_names_with_parents} hidden_gizmo_joints = {name for name in self.skeleton.bone_order_names if name not in skel30_names} hidden_gizmo_joints |= { "RightHandThumbEnd", "RightHandMiddleEnd", "LeftHandThumbEnd", "LeftHandMiddleEnd", "LeftEye", "RightEye", "Jaw", } elif isinstance(self.skeleton, SOMASkeleton30): hidden_gizmo_joints = { "RightHandThumbEnd", "RightHandMiddleEnd", "LeftHandThumbEnd", "LeftHandMiddleEnd", "LeftEye", "RightEye", "Jaw", } if space == "world": # World mode: gizmo rings stay scene-axis-aligned (identity). joints_wxyzs = np.tile( np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float64), (self.skeleton.nbjoints, 1), ) else: # Local mode: gizmo shows joint world rotation so rings follow the joint. joints_wxyzs = tf.SO3.from_matrix(self.joints_rot[self.cur_frame_idx].cpu().numpy()).wxyz for joint_idx in range(self.skeleton.nbjoints): disable_axes = True # by default, only rotation controls disable_sliders = True if joint_idx == self.skeleton.root_idx: disable_axes = False # allow translation for root disable_sliders = False active_axes = (True, True, True) if joint_axis_indices is not None: joint_name = self.skeleton.bone_order_names[joint_idx] axis_idx = joint_axis_indices.get(joint_name) if axis_idx is not None: # PivotControls shows rotation handles when a plane is active. # To allow rotation about one axis, enable the other two axes. active_axes = ( axis_idx != 0, axis_idx != 1, axis_idx != 2, ) joint_visible = True if hidden_gizmo_joints is not None: joint_name = self.skeleton.bone_order_names[joint_idx] joint_visible = joint_name not in hidden_gizmo_joints cur_joint_gizmo = self.server.scene.add_transform_controls( f"/{self.name}/gizmo_joint_{joint_idx}", scale=0.075, line_width=4.0, active_axes=active_axes, disable_axes=disable_axes, disable_sliders=disable_sliders, disable_rotations=False, depth_test=False, # render even when occluded position=self.joints_pos[self.cur_frame_idx, joint_idx].cpu().numpy(), wxyz=joints_wxyzs[joint_idx], visible=joint_visible, space=space, ) self.joint_gizmos.append(cur_joint_gizmo) def set_callback_in_closure(i: int) -> None: @cur_joint_gizmo.on_drag_start def _on_drag_start(_) -> None: if on_drag_start is not None: on_drag_start() self._joint_gizmo_dragging[i] = True if self.gizmo_space == "world": self._drag_start_world_rot[i] = self.joints_rot[self.cur_frame_idx, i].clone().cpu().numpy() @cur_joint_gizmo.on_drag_end def _on_drag_end(_) -> None: self._joint_gizmo_dragging[i] = False # Force-sync so the client always receives the reset (viser setter skips on allclose). # Use self.joint_gizmos[i] (not cur_joint_gizmo) to avoid the # closure-in-loop bug: cur_joint_gizmo would point to the last handle. gizmo = self.joint_gizmos[i] gizmo.sync_position(self.joints_pos[self.cur_frame_idx, i].cpu().numpy()) if self.gizmo_space == "world": gizmo.sync_wxyz((1.0, 0.0, 0.0, 0.0)) else: gizmo.sync_wxyz(tf.SO3.from_matrix(self.joints_rot[self.cur_frame_idx, i].cpu().numpy()).wxyz) self.set_frame(self.cur_frame_idx) @cur_joint_gizmo.on_update def _(_) -> None: self.updating_joint_gizmos = True new_local_joint_rots = self.joints_local_rot[self.cur_frame_idx].clone() # Gizmo parent is identity; client sends rotation as wxyz. # World mode: wxyz is cumulative from identity, compose with # stored initial world rotation. Local mode: wxyz is new world rotation. gizmo_rot_mat = tf.SO3(self.joint_gizmos[i].wxyz).as_matrix() if self.gizmo_space == "world" and self._drag_start_world_rot[i] is not None: new_world_rot_mat = gizmo_rot_mat @ self._drag_start_world_rot[i] else: new_world_rot_mat = gizmo_rot_mat parent_idx = self.skeleton.joint_parents[i].item() if parent_idx >= 0: R_parent_world = self.joints_rot[self.cur_frame_idx, parent_idx].detach().cpu().numpy() new_local_rot_mat_np = (R_parent_world.T @ new_world_rot_mat).astype(np.float32) else: new_local_rot_mat_np = new_world_rot_mat.astype(np.float32) new_local_rot = tf.SO3.from_matrix(new_local_rot_mat_np) joint_name = self.skeleton.bone_order_names[i] if joint_f2q_data is not None and joint_name in joint_f2q_data: # G1 hinge: use offset (f2q) space so 1-DoF and limits match the robot. # R_f2q = offset_f2q @ R_local; angle_f2q = dot(axis_angle(R_f2q), axis_f2q); # MuJoCo q = angle_f2q - rest_dof; limits apply to q. f2q = joint_f2q_data[joint_name] offset_f2q = f2q["offset_f2q"] axis_f2q = f2q["axis_f2q"] rest_dof = f2q["rest_dof_axis_angle"] R_local = new_local_rot_mat_np.astype(np.float64) R_f2q = offset_f2q @ R_local rotvec = tf.SO3.from_matrix(R_f2q).log() angle_f2q = float(np.dot(rotvec, axis_f2q)) # Keep angle continuous relative to current pose. current_R_f2q = offset_f2q @ ( self.joints_local_rot[self.cur_frame_idx, i].detach().cpu().numpy().astype(np.float64) ) current_angle_f2q = float(np.dot(tf.SO3.from_matrix(current_R_f2q).log(), axis_f2q)) two_pi = 2.0 * np.pi angle_f2q = angle_f2q + two_pi * np.round((current_angle_f2q - angle_f2q) / two_pi) q = angle_f2q - rest_dof if joint_limits is not None: joint_limit = joint_limits.get(joint_name) if joint_limit is not None: q = float(np.clip(q, joint_limit[0], joint_limit[1])) angle_f2q = q + rest_dof R_f2q_new = tf.SO3.exp(angle_f2q * axis_f2q).as_matrix() new_local_rot_mat_np = (offset_f2q.T @ R_f2q_new).astype(np.float32) elif joint_axis_indices is not None: axis_idx = joint_axis_indices.get(joint_name) if axis_idx is not None: rotvec = new_local_rot.log() axis = np.zeros(3, dtype=np.float64) axis[axis_idx] = 1.0 angle = float(rotvec[axis_idx]) # Keep angle continuous relative to current pose. current_rot = tf.SO3.from_matrix( self.joints_local_rot[self.cur_frame_idx, i].detach().cpu().numpy() ) current_angle = float(current_rot.log()[axis_idx]) two_pi = 2.0 * np.pi angle = angle + two_pi * np.round((current_angle - angle) / two_pi) if joint_limits is not None: joint_limit = joint_limits.get(joint_name) if joint_limit is not None: angle = float(np.clip(angle, joint_limit[0], joint_limit[1])) new_local_rot_mat_np = tf.SO3.exp(angle * axis).as_matrix() new_local_rot_mat = torch.tensor(new_local_rot_mat_np).to(new_local_joint_rots.device) new_local_joint_rots[i] = new_local_rot_mat self.update_pose_at_frame( self.cur_frame_idx, joints_local_rot=new_local_joint_rots, ) # handle root translation separately cur_joints_pos = self.joints_pos[self.cur_frame_idx].clone() if i == self.skeleton.root_idx: new_root_pos = to_torch( self.joint_gizmos[i].position, device=self.joints_pos.device, ).to(dtype=self.joints_pos.dtype) root_diff = new_root_pos - self.joints_pos[self.cur_frame_idx, i] if torch.norm(root_diff) > 1e-3: # the root translation has been changed # translate to gizmo position cur_joints_pos += root_diff[None] self.update_pose_at_frame( self.cur_frame_idx, joints_pos=cur_joints_pos, joints_rot=self.joints_rot[self.cur_frame_idx], joints_local_rot=self.joints_local_rot[self.cur_frame_idx], ) # update immediately to show user changes. Keep updating_joint_gizmos # True so set_frame does not overwrite gizmo wxyz mid-drag. self.set_frame(self.cur_frame_idx) self.updating_joint_gizmos = False if i == self.skeleton.root_idx: # update the 2D waypoint constraints as well if there is one if "2D Root" in constraints: root_2d_contraints = constraints["2D Root"] # if there is a constraint at that frame, we want to update it frame_idx = self.cur_frame_idx if frame_idx in root_2d_contraints.keyframes: new_root_pos[1] = 0.0 # force y to 0 for keyframe_id in root_2d_contraints.frame2keyid[frame_idx]: # add will modify the existing constraint root_2d_contraints.add_keyframe( keyframe_id, frame_idx, root_pos=new_root_pos, exists_ok=True, update_path=False, ) if "Full-Body" in constraints: full_body_constraints = constraints["Full-Body"] # if there is a constraint at that frame, we want to update it frame_idx = self.cur_frame_idx if frame_idx in full_body_constraints.keyframes: for keyframe_id in full_body_constraints.frame2keyid[frame_idx]: # add will modify the existing constraint full_body_constraints.add_keyframe( keyframe_id, frame_idx, joints_pos=self.joints_pos[frame_idx], joints_rot=self.joints_rot[frame_idx], exists_ok=True, ) if "End-Effectors" in constraints: end_effector_constraints = constraints["End-Effectors"] # if there is a constraint at that frame, we want to update it frame_idx = self.cur_frame_idx if frame_idx in end_effector_constraints.keyframes: current_dict = end_effector_constraints.keyframes[frame_idx] for keyframe_id, _ in end_effector_constraints.frame2keyid[frame_idx]: # add will modify the existing constraint end_effector_constraints.add_keyframe( keyframe_id, frame_idx, joints_pos=self.joints_pos[frame_idx], joints_rot=self.joints_rot[frame_idx], joint_names=current_dict["joint_names"], end_effector_type=current_dict["end_effector_type"], exists_ok=True, ) set_callback_in_closure(joint_idx) def clear_all_gizmos(self): self.updating_root_translation_gizmo = True self.updating_joint_gizmos = True if self.root_translation_gizmo is not None: self.server.scene.remove_by_name(self.root_translation_gizmo.name) self.root_translation_gizmo = None if self.joint_gizmos is not None: for joint_gizmo in self.joint_gizmos: self.server.scene.remove_by_name(joint_gizmo.name) self.joint_gizmos = None self._drag_start_world_rot = [] self._joint_gizmo_dragging = [] self.updating_root_translation_gizmo = False self.updating_joint_gizmos = False ================================================ FILE: kimodo/viz/scene.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Viser scene entities: waypoints, skeleton mesh, and character.""" import os import traceback from pathlib import Path from typing import Optional, Tuple import numpy as np import torch import trimesh import viser import viser.transforms as tf from kimodo.skeleton import ( G1Skeleton34, SkeletonBase, SMPLXSkeleton22, SOMASkeleton30, SOMASkeleton77, ) from .coords import rotation_matrix_from_two_vec from .g1_rig import ( G1MeshRig, ) from .smplx_skin import SMPLXSkin from .soma_skin import SOMASkin class WaypointMesh: def __init__( self, name: str, server: viser.ViserServer, position: np.ndarray, heading: Optional[np.ndarray] = None, color: Optional[Tuple[int, int, int]] = (255, 0, 0), ): self.server = server sphere = trimesh.creation.icosphere(subdivisions=3, radius=0.025) annulus = trimesh.creation.annulus(r_min=0.1, r_max=0.2, height=0.005) z_to_y_up = np.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) annulus_vertices = annulus.vertices @ z_to_y_up self.sphere = self.server.scene.add_mesh_simple( name=f"{name}/sphere", vertices=sphere.vertices, faces=sphere.faces, position=position, color=color, ) self.annulus = self.server.scene.add_mesh_simple( name=f"{name}/annulus", vertices=annulus_vertices, faces=annulus.faces, position=position, color=color, ) self.arrow_base = None self.arrow_head = None if heading is not None: assert heading.shape == (2,), "Heading must be a 2D vector" heading = 0.3 * (heading / np.linalg.norm(heading)) heading_3d = np.array([heading[0], 0, heading[1]]) arrow_base = trimesh.creation.cylinder(radius=0.01, height=0.3) arrow_head = trimesh.creation.cone(radius=0.03, height=0.075) arrow_base_vertices = arrow_base.vertices arrow_head_vertices = arrow_head.vertices self.arrow_base = self.server.scene.add_mesh_simple( name=f"{name}/arrow_base", vertices=arrow_base_vertices, faces=arrow_base.faces, position=position + (heading_3d / 2), color=color, ) self.arrow_head = self.server.scene.add_mesh_simple( name=f"{name}/arrow_head", vertices=arrow_head_vertices, faces=arrow_head.faces, position=position + heading_3d, color=color, ) def update_position(self, position: np.ndarray, heading: Optional[np.ndarray] = None): self.sphere.position = position self.annulus.position = position if heading is not None: assert heading.shape == (2,), "Heading must be a 2D vector" heading = 0.3 * (heading / np.linalg.norm(heading)) heading_3d = np.array([heading[0], 0, heading[1]]) if self.arrow_base is not None: self.arrow_base.position = position + (heading_3d / 2) if self.arrow_head is not None: self.arrow_head.position = position + heading_3d def clear(self): self.server.scene.remove_by_name(self.sphere.name) self.server.scene.remove_by_name(self.annulus.name) if self.arrow_base is not None: self.server.scene.remove_by_name(self.arrow_base.name) if self.arrow_head is not None: self.server.scene.remove_by_name(self.arrow_head.name) def set_visible(self, visible: bool) -> None: self.sphere.visible = visible self.annulus.visible = visible if self.arrow_base is not None: self.arrow_base.visible = visible if self.arrow_head is not None: self.arrow_head.visible = visible class SkeletonMesh: def __init__( self, name: str, server: viser.ViserServer, skeleton: SkeletonBase, joint_color: Optional[Tuple[float, float, float] | np.ndarray] = ( 255, 235, 0, ), bone_color: Optional[Tuple[float, float, float] | np.ndarray] = ( 27, 106, 0, ), starting_joints_pos: Optional[torch.Tensor] = None, ): """ name: str, name of the skeleton mesh server: viser.ViserServer, server to add the skeleton mesh to skeleton: SkeletonBase, skeleton to visualize joint_color: Optional[Tuple[float, float, float] | np.ndarray], color of the joints bone_color: Optional[Tuple[float, float, float] | np.ndarray], color of the bones starting_joints_pos: Optional[torch.Tensor], starting joint positions """ self.server = server self.skeleton = skeleton joint_mesh = trimesh.creation.icosphere(subdivisions=3, radius=0.02) bone_mesh = trimesh.creation.cylinder(radius=0.01, height=1.0) init_joints_pos = skeleton.neutral_joints.clone() self.num_joints = init_joints_pos.shape[0] num_bones = self.num_joints - 1 non_root_bones = [ joint_name for joint_name, parent_name in self.skeleton.bone_order_names_with_parents if parent_name is not None ] self.bone_to_idx = {bone_name: idx for idx, bone_name in enumerate(non_root_bones)} # initialize meshes init_joints_wxyzs = np.concatenate([np.ones((self.num_joints, 1)), np.zeros((self.num_joints, 3))], axis=1) if isinstance(joint_color, tuple): self.joint_colors = np.full((self.num_joints, 3), joint_color) elif isinstance(joint_color, np.ndarray): assert joint_color.shape == ( self.num_joints, 3, ), "Joint colors must be (J, 3)" self.joint_colors = joint_color joint_scales = np.ones((self.num_joints, 3)) hand_roots = {"LeftHand", "RightHand"} finger_joint_names = set(skeleton.left_hand_joint_names + skeleton.right_hand_joint_names) - hand_roots for jname in finger_joint_names: if jname in skeleton.bone_index: joint_scales[skeleton.bone_index[jname]] = 0.6 self.joint_scales = joint_scales self.joints_batched_mesh = server.scene.add_batched_meshes_simple( f"{name}/joints", vertices=joint_mesh.vertices, faces=joint_mesh.faces, batched_wxyzs=init_joints_wxyzs, batched_positions=np.zeros((self.num_joints, 3)), batched_scales=joint_scales, batched_colors=self.joint_colors, ) init_bones_wxyzs = np.concatenate([np.ones((num_bones, 1)), np.zeros((num_bones, 3))], axis=1) if isinstance(bone_color, tuple): bone_color = np.full((num_bones, 3), bone_color) elif isinstance(bone_color, np.ndarray): assert bone_color.shape == (num_bones, 3), "Bone colors must be (J-1, 3)" bone_color = bone_color self.bones_batched_mesh = server.scene.add_batched_meshes_simple( f"{name}/bones", vertices=bone_mesh.vertices, faces=bone_mesh.faces, batched_wxyzs=init_bones_wxyzs, batched_positions=np.zeros((num_bones, 3)), batched_scales=np.ones((num_bones, 3)), batched_colors=bone_color, ) self.mesh_info_cache = None if starting_joints_pos is not None: self.set_pose(starting_joints_pos) else: if isinstance(skeleton, SOMASkeleton77): skel30 = SOMASkeleton30(load=True) min_height = skel30.neutral_joints[:, 1].min().item() else: min_height = init_joints_pos[:, 1].min().item() init_joints_pos[:, 1] -= min_height # move to be on ground self.set_pose(init_joints_pos) def compute_single_pose(self, joints_pos: np.ndarray): """Compute the mesh for a single frame. joints_pos: [J, 3] global joint positions. """ new_batched_positions = np.zeros((self.skeleton.nbjoints - 1, 3)) new_batched_wxyzs = np.zeros((self.skeleton.nbjoints - 1, 4)) new_batched_scales = np.ones((self.skeleton.nbjoints - 1, 3)) for joint_name, parent_name in self.skeleton.bone_order_names_with_parents: if parent_name is None: continue joint_idx = self.skeleton.bone_index[joint_name] parent_idx = self.skeleton.bone_index[parent_name] joint_pos = joints_pos[joint_idx] parent_pos = joints_pos[parent_idx] bone_pos = (joint_pos + parent_pos) / 2.0 bone_scale = np.linalg.norm(joint_pos - parent_pos) if bone_scale < 1e-8: bone_wxyz = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float64) else: bone_dir = (joint_pos - parent_pos) / bone_scale R = rotation_matrix_from_two_vec(np.array([0.0, 0.0, 1.0], dtype=np.float64), bone_dir) bone_wxyz = tf.SO3.from_matrix(R).wxyz bone_idx = self.bone_to_idx[joint_name] new_batched_positions[bone_idx] = bone_pos new_batched_wxyzs[bone_idx] = bone_wxyz new_batched_scales[bone_idx] = np.array([1.0, 1.0, bone_scale], dtype=float) return new_batched_positions, new_batched_wxyzs, new_batched_scales def precompute_mesh_info(self, joints_pos: torch.Tensor): """Precompute the meshes for all frames at once. joints_pos: [T, J, 3]. """ joints_pos = joints_pos.cpu().numpy() num_frames = joints_pos.shape[0] self.mesh_info_cache = { "positions": np.zeros((num_frames, self.skeleton.nbjoints - 1, 3)), "wxyzs": np.zeros((num_frames, self.skeleton.nbjoints - 1, 4)), "scales": np.ones((num_frames, self.skeleton.nbjoints - 1, 3)), } for i in range(num_frames): new_batched_positions, new_batched_wxyzs, new_batched_scales = self.compute_single_pose(joints_pos[i]) self.mesh_info_cache["positions"][i] = new_batched_positions self.mesh_info_cache["wxyzs"][i] = new_batched_wxyzs self.mesh_info_cache["scales"][i] = new_batched_scales def update_mesh_info_cache(self, joints_pos: torch.Tensor, frame_idx: int): """Update the mesh info cache for the given frame.""" assert self.mesh_info_cache is not None new_batched_positions, new_batched_wxyzs, new_batched_scales = self.compute_single_pose( joints_pos.cpu().numpy() ) self.mesh_info_cache["positions"][frame_idx] = new_batched_positions self.mesh_info_cache["wxyzs"][frame_idx] = new_batched_wxyzs self.mesh_info_cache["scales"][frame_idx] = new_batched_scales def set_pose( self, joints_pos: torch.Tensor, foot_contacts: Optional[torch.Tensor] = None, frame_idx: Optional[int] = None, ): """Set pose from [J, 3] global joint positions.""" self.cur_joints_pos = joints_pos joints_pos = joints_pos.cpu().numpy() if self.mesh_info_cache is not None: assert frame_idx is not None new_batched_positions = self.mesh_info_cache["positions"][frame_idx] new_batched_wxyzs = self.mesh_info_cache["wxyzs"][frame_idx] new_batched_scales = self.mesh_info_cache["scales"][frame_idx] else: new_batched_positions, new_batched_wxyzs, new_batched_scales = self.compute_single_pose(joints_pos) self.bones_batched_mesh.batched_positions = new_batched_positions self.bones_batched_mesh.batched_wxyzs = new_batched_wxyzs self.bones_batched_mesh.batched_scales = new_batched_scales self.joints_batched_mesh.batched_positions = joints_pos if foot_contacts is not None: cur_joint_colors = self.joint_colors.copy() foot_contacts = foot_contacts.bool().cpu().numpy().astype(bool) foot_joints = np.array(self.skeleton.foot_joint_idx, dtype=int) contact_idx = foot_joints[foot_contacts] cur_joint_colors[contact_idx] = (255, 0, 0) self.joints_batched_mesh.batched_colors = cur_joint_colors else: self.joints_batched_mesh.batched_colors = self.joint_colors def set_visibility(self, visible: bool): self.joints_batched_mesh.visible = visible self.bones_batched_mesh.visible = visible def get_pose(self) -> np.ndarray: return self.cur_joints_pos def clear(self): names = [mesh.name for mesh in [self.joints_batched_mesh, self.bones_batched_mesh]] for name in names: self.server.scene.remove_by_name(name) LIGHT_THEME = dict( mesh=(152, 189, 255), ) DARK_THEME = dict( mesh=(100, 135, 195), ) SKIN_CACHE = {} class Character: def __init__( self, name: str, server: viser.ViserServer | viser.ClientHandle, skeleton: SkeletonBase, create_skeleton_mesh: bool = True, create_skinned_mesh: bool = True, visible_skeleton: bool = False, visible_skinned_mesh: bool = True, skinned_mesh_opacity: float = 1.0, show_foot_contacts: bool = True, dark_mode: bool = False, mesh_mode: Optional[str] = None, gui_use_soma_layer_checkbox: Optional[viser.GuiCheckboxHandle] = None, ): self.server = server self.name = name self.skeleton = skeleton self.cur_joints_pos = None self.cur_joints_rot = None self.cur_foot_contacts = None self.skeleton_mesh = None self.show_foot_contacts = show_foot_contacts if create_skeleton_mesh: self.skeleton_mesh = SkeletonMesh(f"/{name}/skeleton", server, skeleton) self.cur_joints_pos = self.skeleton_mesh.get_pose() self.skeleton_mesh.set_visibility(visible_skeleton) self.skinned_mesh = None self.skin = None self.mesh_mode = mesh_mode self.g1_mesh_rig = None if create_skinned_mesh: if isinstance(self.skeleton, (SOMASkeleton30, SOMASkeleton77)) and mesh_mode in [ "soma_skin", "soma_layer_skin", ]: if mesh_mode in SKIN_CACHE: # already okay pass else: if mesh_mode == "soma_layer_skin": try: # try importing the lib from .soma_layer_skin import SOMASkin as SOMASkin_SOMA if mesh_mode not in SKIN_CACHE: SKIN_CACHE[mesh_mode] = SOMASkin_SOMA(self.skeleton) except (ModuleNotFoundError, FileNotFoundError) as e: if isinstance(e, ModuleNotFoundError): msg = "SOMA layer skin is unavailable: the soma package is not installed." else: msg = "SOMA layer skin is unavailable: SOMA asset files are missing." traceback.print_exc() if hasattr(self.server, "add_notification"): self.server.add_notification( "SOMA layer skin unavailable", msg, auto_close_seconds=5.0, with_close_button=True, ) if gui_use_soma_layer_checkbox is not None: gui_use_soma_layer_checkbox.value = False mesh_mode = "soma_skin" # another if, in case mesh_mode changed if mesh_mode == "soma_skin" and mesh_mode not in SKIN_CACHE: SKIN_CACHE[mesh_mode] = SOMASkin(self.skeleton) self.skin = SKIN_CACHE[mesh_mode] self.skinned_mesh = server.scene.add_mesh_simple( f"/{name}/simple_skinned", vertices=self.skin.bind_vertices.cpu().numpy(), faces=self.skin.faces.cpu().numpy(), opacity=None, color=LIGHT_THEME["mesh"] if not dark_mode else DARK_THEME["mesh"], wireframe=False, visible=False, ) self.skinned_verts_cache = None bind_pos = self.skeleton.neutral_joints.clone() if isinstance(self.skeleton, SOMASkeleton77): skel30 = SOMASkeleton30(load=True) min_height = skel30.neutral_joints[:, 1].min().item() else: min_height = bind_pos[:, 1].min().item() bind_pos[:, 1] -= min_height bind_pos[:, 1] += 0.02 bind_rotmat = torch.eye(3, device=bind_pos.device).repeat(bind_pos.shape[0], 1, 1) self.set_pose(bind_pos, bind_rotmat) self.skinned_mesh.visible = True self.set_skinned_mesh_visibility(visible_skinned_mesh) self.set_skinned_mesh_opacity(skinned_mesh_opacity) elif isinstance(self.skeleton, SMPLXSkeleton22) and mesh_mode == "smplx_skin": if mesh_mode not in SKIN_CACHE: SKIN_CACHE[mesh_mode] = SMPLXSkin(self.skeleton) self.skin = SKIN_CACHE[mesh_mode] self.skinned_mesh = server.scene.add_mesh_simple( f"/{name}/simple_skinned", vertices=self.skin.bind_vertices.cpu().numpy(), faces=self.skin.faces.cpu().numpy(), opacity=None, color=LIGHT_THEME["mesh"] if not dark_mode else DARK_THEME["mesh"], wireframe=False, visible=False, ) self.skinned_verts_cache = None bind_pos = self.skeleton.neutral_joints.clone() min_height = bind_pos[:, 1].min().item() bind_pos[:, 1] -= min_height bind_rotmat = torch.eye(3, device=bind_pos.device).repeat(bind_pos.shape[0], 1, 1) self.set_pose(bind_pos, bind_rotmat) self.skinned_mesh.visible = True self.set_skinned_mesh_visibility(visible_skinned_mesh) self.set_skinned_mesh_opacity(skinned_mesh_opacity) elif isinstance(self.skeleton, G1Skeleton34) and mesh_mode == "g1_stl": g1_mesh_dir = Path(self.skeleton.folder) / "meshes/g1" if not os.path.exists(g1_mesh_dir): raise ValueError(f"G1 mesh directory not found: {g1_mesh_dir}") self.g1_mesh_rig = G1MeshRig( name, server, self.skeleton, str(g1_mesh_dir), DARK_THEME["mesh"] if dark_mode else LIGHT_THEME["mesh"], ) init_joints_rot = self.skeleton.rest_pose_local_rot.clone() init_global_joint_rots, _, init_joints_pos = self.skeleton.fk( init_joints_rot, torch.zeros(3, device=init_joints_rot.device, dtype=init_joints_rot.dtype), ) min_height = init_joints_pos[:, 1].min().item() init_joints_pos[:, 1] -= min_height self.set_pose(init_joints_pos, init_global_joint_rots) self.set_skinned_mesh_visibility(visible_skinned_mesh) self.set_skinned_mesh_opacity(skinned_mesh_opacity) else: raise ValueError( "Unsupported mesh mode for skeleton type: " f"{type(self.skeleton).__name__} with mesh_mode={mesh_mode}" ) def change_theme(self, is_dark_mode): color = DARK_THEME["mesh"] if is_dark_mode else LIGHT_THEME["mesh"] if self.skinned_mesh is not None: self.skinned_mesh.color = color if self.g1_mesh_rig is not None: self.g1_mesh_rig.set_color(color) def set_skeleton_visibility(self, visible: bool): if self.skeleton_mesh is not None: self.skeleton_mesh.set_visibility(visible) def set_show_foot_contacts(self, show: bool, frame_idx: Optional[int] = None): self.show_foot_contacts = show if self.skeleton_mesh is not None and self.cur_joints_pos is not None: fc = self.cur_foot_contacts if show else None self.skeleton_mesh.set_pose(self.cur_joints_pos, foot_contacts=fc, frame_idx=frame_idx) def set_skinned_mesh_visibility(self, visible: bool): if self.skinned_mesh is not None: self.skinned_mesh.visible = visible if self.g1_mesh_rig is not None: self.g1_mesh_rig.set_visibility(visible) def set_skinned_mesh_opacity(self, opacity: float): if self.skinned_mesh is not None: self.skinned_mesh.opacity = opacity if self.g1_mesh_rig is not None: self.g1_mesh_rig.set_opacity(opacity) def set_skinned_mesh_wireframe(self, wireframe: bool): if self.skinned_mesh is not None: self.skinned_mesh.wireframe = wireframe if self.g1_mesh_rig is not None: self.g1_mesh_rig.set_wireframe(wireframe) def precompute_skinning(self, joints_pos: torch.Tensor, joints_rot: torch.Tensor, chunk_size: int = 64): """Precompute skinning for all frames, processing in chunks to avoid OOM. joints_pos: [T, J, 3], joints_rot: [T, J, 3, 3]. The LBS gather intermediate is ~V*W*48 bytes per frame (V=18k, W=8 for SOMA gives ~7 MB/frame), so a chunk of 64 peaks around ~1 GB -- safe alongside a loaded text encoder + diffusion model on a typical 24 GB GPU. """ assert self.skin is not None T = joints_pos.shape[0] with torch.no_grad(): if T <= chunk_size: self.skinned_verts_cache = self.skin.skin(joints_rot, joints_pos, rot_is_global=True).cpu().numpy() else: chunks = [] for start in range(0, T, chunk_size): end = min(start + chunk_size, T) verts = self.skin.skin(joints_rot[start:end], joints_pos[start:end], rot_is_global=True).cpu().numpy() chunks.append(verts) self.skinned_verts_cache = np.concatenate(chunks, axis=0) def update_skinning_cache(self, joints_pos: torch.Tensor, joints_rot: torch.Tensor, frame_idx: int): """Update skinning cache for one frame.""" if self.skinned_verts_cache is None: return with torch.no_grad(): new_skinned_verts = self.skin.skin(joints_rot[None], joints_pos[None], rot_is_global=True)[0].cpu().numpy() self.skinned_verts_cache[frame_idx] = new_skinned_verts def set_pose( self, joints_pos: torch.Tensor, joints_rot: torch.Tensor, foot_contacts: Optional[torch.Tensor] = None, frame_idx: Optional[int] = None, ): if self.skeleton_mesh is not None: self.cur_foot_contacts = foot_contacts display_fc = foot_contacts if self.show_foot_contacts else None self.skeleton_mesh.set_pose(joints_pos, foot_contacts=display_fc, frame_idx=frame_idx) if self.skinned_mesh is not None: if self.skinned_verts_cache is not None: assert frame_idx is not None skinned_verts = self.skinned_verts_cache[frame_idx] else: with torch.no_grad(): skinned_verts = self.skin.skin(joints_rot[None], joints_pos[None], rot_is_global=True)[0].cpu().numpy() self.skinned_mesh.vertices = skinned_verts if self.g1_mesh_rig is not None: joints_pos_np = joints_pos.detach().cpu().numpy() joints_rot_np = joints_rot.detach().cpu().numpy() self.g1_mesh_rig.set_pose(joints_pos_np, joints_rot_np) self.cur_joints_pos = joints_pos self.cur_joints_rot = joints_rot def get_pose(self) -> torch.Tensor: return self.cur_joints_pos, self.cur_joints_rot def clear(self): if self.skeleton_mesh is not None: self.skeleton_mesh.clear() if self.skinned_mesh is not None: self.server.scene.remove_by_name(self.skinned_mesh.name) if self.g1_mesh_rig is not None: self.g1_mesh_rig.clear() ================================================ FILE: kimodo/viz/smplx_skin.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """SMPL-X skinning and joint mapping for visualization.""" import os import warnings from pathlib import Path import numpy as np import torch from kimodo.geometry import axis_angle_to_matrix from kimodo.skeleton import SMPLXSkeleton22, batch_rigid_transform SKIN_NAME = "SMPLX_NEUTRAL.npz" BETA_NAME = "beta.npy" MEAN_HANDS_NAME = "mean_hands.npy" SMPLX_BODY_JOINT_NAME_MAP = { "pelvis": "Pelvis", "left_hip": "L_Hip", "right_hip": "R_Hip", "spine1": "Spine1", "left_knee": "L_Knee", "right_knee": "R_Knee", "spine2": "Spine2", "left_ankle": "L_Ankle", "right_ankle": "R_Ankle", "spine3": "Spine3", "left_foot": "L_Foot", "right_foot": "R_Foot", "neck": "Neck", "left_collar": "L_Collar", "right_collar": "R_Collar", "head": "Head", "left_shoulder": "L_Shoulder", "right_shoulder": "R_Shoulder", "left_elbow": "L_Elbow", "right_elbow": "R_Elbow", "left_wrist": "L_Wrist", "right_wrist": "R_Wrist", } # SMPL-X hand pose order (15 joints per hand) matching SMPL-X index order. SMPLX_HAND_JOINT_ORDER = [ "Index1", "Index2", "Index3", "Middle1", "Middle2", "Middle3", "Pinky1", "Pinky2", "Pinky3", "Ring1", "Ring2", "Ring3", "Thumb1", "Thumb2", "Thumb3", ] SMPLX_FACE_JOINT_NAMES = ["Jaw", "L_Eye", "R_Eye"] class SMPLXSkin: def __init__( self, skeleton, use_mean_hands=True, ): skel_dir = Path(skeleton.folder) skin_data_path = skel_dir / SKIN_NAME if not skin_data_path.exists(): raise FileExistsError( f"You should download the {SKIN_NAME} from the smplx website, and put it there: {skin_data_path}" ) beta_path = skel_dir / BETA_NAME mean_hands_path = skel_dir / MEAN_HANDS_NAME self.skeleton = skeleton assert isinstance(skeleton, SMPLXSkeleton22), "SMPLXSkin only supports SMPLXSkeleton22" assert skeleton.neutral_joints is not None, "SMPLXSkeleton22 must have neutral joints instantiated" device = skeleton.neutral_joints.device with warnings.catch_warnings(): # Ignore legacy object-dtype warning emitted while unpickling old SMPL-X assets. warnings.filterwarnings( "ignore", message=r"dtype\(\): align should be passed as Python or NumPy boolean.*", category=Warning, module=r"numpy\.lib\._format_impl", ) # np.load on .npz is lazy; materialize all fields while filter is active. with np.load(skin_data_path, allow_pickle=True) as skin_npz: skin_data = {key: skin_npz[key] for key in skin_npz.files} joint2num = skin_data["joint2num"] if isinstance(joint2num, np.ndarray): joint2num = joint2num.item() self.full_joint_count = int(skin_data["weights"].shape[1]) kintree_table = np.array(skin_data["kintree_table"], dtype=np.int64) parents = kintree_table[0].copy() parents[parents > 1_000_000_000] = -1 self.full_joint_parents = torch.tensor(parents, device=device, dtype=torch.long) root_candidates = np.where(parents == -1)[0] self.full_root_idx = int(root_candidates[0]) if root_candidates.size else 0 self.joint_regressor = torch.tensor( np.array(skin_data["J_regressor"], dtype=np.float32), device=device, dtype=torch.float, ) rig_joint_names = [] rig_joint_indices = [] for joint_name in self.skeleton.bone_order_names: mapped_name = SMPLX_BODY_JOINT_NAME_MAP.get(joint_name) if mapped_name is None or mapped_name not in joint2num: raise ValueError(f"Missing SMPL-X joint mapping for '{joint_name}'") rig_joint_names.append(mapped_name) rig_joint_indices.append(int(joint2num[mapped_name])) self.body_joint_indices = np.array(rig_joint_indices, dtype=np.int64) # Prepare mean hand pose rotations for joints not produced by the model. if use_mean_hands and mean_hands_path is not None and os.path.exists(mean_hands_path): mean_hands = np.array(np.load(mean_hands_path), dtype=np.float32) else: mean_hands = np.zeros(90, dtype=np.float32) if mean_hands.shape[0] != 90: raise ValueError(f"Expected mean_hands shape (90,), got {mean_hands.shape}") mean_hands = mean_hands.reshape(30, 3) mean_hands_rotmats = axis_angle_to_matrix(torch.tensor(mean_hands, device=device, dtype=torch.float)) left_hand_joint_names = [f"L_{name}" for name in SMPLX_HAND_JOINT_ORDER] right_hand_joint_names = [f"R_{name}" for name in SMPLX_HAND_JOINT_ORDER] left_indices = [joint2num[name] for name in left_hand_joint_names] right_indices = [joint2num[name] for name in right_hand_joint_names] self.hand_joint_indices = np.array(left_indices + right_indices, dtype=np.int64) self.mean_hand_rotmats = mean_hands_rotmats face_indices = [joint2num[name] for name in SMPLX_FACE_JOINT_NAMES if name in joint2num] self.face_joint_indices = np.array(face_indices, dtype=np.int64) self.mean_face_rotmats = torch.eye(3, device=device).repeat(len(self.face_joint_indices), 1, 1) # bind_rig_transform: [J, 4, 4] # bind_vertices: [V, 3] # faces: [F, 3] # lbs indices, lbs weights: [V, W] (W = number of joints) v_template = np.array(skin_data["v_template"], dtype=np.float32) faces = np.array(skin_data["f"], dtype=np.int64) weights = np.array(skin_data["weights"], dtype=np.float32) shapedirs = np.array(skin_data["shapedirs"], dtype=np.float32) posedirs = np.array(skin_data["posedirs"], dtype=np.float32) if beta_path is not None and os.path.exists(beta_path): betas = np.array(np.load(beta_path), dtype=np.float32) else: betas = np.zeros(300, dtype=np.float32) num_shape_coeffs = shapedirs.shape[2] # 400 = 300 + 100 (shape + expression) if betas.shape[0] < num_shape_coeffs: betas = np.pad(betas, (0, num_shape_coeffs - betas.shape[0]), mode="constant") elif betas.shape[0] > num_shape_coeffs: betas = betas[:num_shape_coeffs] v_shaped = v_template + np.tensordot(shapedirs, betas, axes=[2, 0]) self.v_shaped = torch.tensor(v_shaped, device=device, dtype=torch.float) self.posedirs = torch.tensor(posedirs, device=device, dtype=torch.float) self.joint_rest = torch.einsum("jv,vc->jc", self.joint_regressor, self.v_shaped) # Align SMPL-X body rest joints to the model skeleton rest pose. body_rest = self.skeleton.neutral_joints.to(device=device, dtype=torch.float) if body_rest.shape[0] == self.body_joint_indices.shape[0]: # Treat mismatches as a warning and align to the skeleton pose anyway. max_delta = (self.joint_rest[self.body_joint_indices] - body_rest).abs().max() if max_delta > 1e-6: print( "Warning: SMPL-X rest pose mismatch (max_delta=" f"{max_delta:.2e}); aligning to skeleton neutral joints." ) self.joint_rest[self.body_joint_indices] = body_rest # Renormalize weights to avoid numerical issues. weight_sums = weights.sum(axis=1, keepdims=True) zero_mask = weight_sums[:, 0] < 1e-8 weights = weights / np.clip(weight_sums, 1e-8, None) if np.any(zero_mask): weights[zero_mask, :] = 0.0 weights[zero_mask, self.full_root_idx] = 1.0 joint_indices = np.arange(self.full_joint_count, dtype=np.int64) lbs_indices = np.tile(joint_indices[None, :], (v_template.shape[0], 1)) bind_rig_np = np.zeros((self.full_joint_count, 4, 4), dtype=np.float32) bind_rig_np[:, 3, 3] = 1.0 bind_rig_np[:, :3, :3] = np.eye(3, dtype=np.float32) bind_rig_np[:, :3, 3] = self.joint_rest.detach().cpu().numpy() self.bind_rig_transform = torch.from_numpy(bind_rig_np).to(device=device, dtype=torch.float) bind_rig_inv_np = np.linalg.inv(bind_rig_np) self.bind_rig_transform_inv = torch.from_numpy(bind_rig_inv_np).to(device=device, dtype=torch.float) self.bind_vertices = torch.tensor(v_shaped, device=device, dtype=torch.float) self.faces = torch.tensor(faces, device=device, dtype=torch.long) self.lbs_indices = torch.tensor(lbs_indices, device=device, dtype=torch.long) self.lbs_weights = torch.tensor(weights, device=device, dtype=torch.float) # double check the rig matches expected skeleton order for sname, rname in zip(self.skeleton.bone_order_names, rig_joint_names): mapped_name = SMPLX_BODY_JOINT_NAME_MAP.get(sname) if mapped_name != rname: raise ValueError(f"MISMATCH in skinning rig: expected='{mapped_name}' vs rig='{rname}'") def lbs(self, posed_transform, bind_vertices=None): bind_rig_transform_inv = self.bind_rig_transform_inv if bind_vertices is None: bind_vertices = self.bind_vertices lbs_weights = self.lbs_weights # posed_transform: [B, F, J, 4, 4] or [B, J, 4, 4] or [J, 4, 4] # unsqueeze to match posed_transform batch dims batch_dims = posed_transform.shape[:-3] if bind_vertices.dim() == 2: for _ in batch_dims: bind_vertices = bind_vertices.unsqueeze(0) elif bind_vertices.dim() == 3: if len(batch_dims) == 1: if bind_vertices.shape[0] != batch_dims[0]: bind_vertices = bind_vertices.unsqueeze(0) elif len(batch_dims) > 1: for _ in range(len(batch_dims) - 1): bind_vertices = bind_vertices.unsqueeze(0) for _ in batch_dims: bind_rig_transform_inv = bind_rig_transform_inv.unsqueeze(0) lbs_weights = lbs_weights.unsqueeze(0) # bind_rig_transform_inv: [..., J, 4, 4] # bind_vertices: [..., V, 3] # lbs_weights: [..., V, W] affine_mat = (posed_transform @ bind_rig_transform_inv)[..., :3, :] # [..., J, 3, 4] vs = ( affine_mat[..., self.lbs_indices, :, :] @ torch.concat([bind_vertices, torch.ones_like(bind_vertices[..., 0:1])], dim=-1)[..., None, :, None] ) # [..., V, W, 3, 1] ws = lbs_weights[..., None, None] resv = (vs * ws).sum(dim=-3).squeeze(-1) # [..., V, 3] return resv def skin(self, joint_rotmat, joint_pos, rot_is_global=False): """ joint_rotmat: [T, J, 3, 3] local or global joint rotation matrices joint_pos: [T, J, 3] global joint positions rot_is_global: bool, if True, joint_rotmat is global rotation matrices, otherwise it is local rotation matrices and FK is performed internally """ nF, nJ = joint_pos.shape[:2] device = joint_rotmat.device # import ipdb; ipdb.set_trace() if rot_is_global: if joint_rotmat.shape[1] == self.full_joint_count: local_rotmat_full = joint_rotmat.clone() parents = self.full_joint_parents.to(device) parent_rot_mats = local_rotmat_full[:, parents] parent_rot_mats[:, self.full_root_idx] = torch.eye(3, device=device) parent_rot_mats_inv = parent_rot_mats.transpose(2, 3) local_rotmat_full = torch.einsum( "T N m n, T N n o -> T N m o", parent_rot_mats_inv, local_rotmat_full, ) else: local_rotmat = self.skeleton.global_rots_to_local_rots(joint_rotmat) else: local_rotmat = joint_rotmat if rot_is_global and joint_rotmat.shape[1] == self.full_joint_count: full_local = local_rotmat_full else: full_local = torch.eye(3, device=device).reshape(1, 1, 3, 3).repeat(nF, self.full_joint_count, 1, 1) full_local[:, self.body_joint_indices] = local_rotmat if self.mean_hand_rotmats is not None: full_local[:, self.hand_joint_indices] = self.mean_hand_rotmats[None] if self.mean_face_rotmats is not None: full_local[:, self.face_joint_indices] = self.mean_face_rotmats[None] pose_feature = (full_local[:, 1:] - torch.eye(3, device=device)[None, None]).reshape(nF, -1) pose_offsets = torch.einsum("vcp,tp->tvc", self.posedirs, pose_feature) v_posed = self.v_shaped[None] + pose_offsets joints_rest = self.joint_rest[None].repeat(nF, 1, 1) posed_joints, global_joint_rots = batch_rigid_transform( full_local, joints_rest, self.full_joint_parents.to(device), self.full_root_idx, ) # remove the skeleton offset of the root joint root_trans = joint_pos[:, self.skeleton.root_idx] - self.skeleton.neutral_joints[0:1] posed_joints = posed_joints + root_trans[:, None, :] fk_transform = torch.eye(4, device=device)[None, None].repeat(nF, self.full_joint_count, 1, 1) fk_transform[..., :3, :3] = global_joint_rots fk_transform[..., :3, 3] = posed_joints vertices = self.lbs(fk_transform, bind_vertices=v_posed) return vertices ================================================ FILE: kimodo/viz/soma_layer_skin.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """SOMA layer-based skinning for visualization (SOMASkeleton30 / SOMASkeleton77).""" from pathlib import Path import numpy as np import torch from huggingface_hub import snapshot_download from soma import SomaLayer as SOMALayer from kimodo.assets import SOMA_ASSETS_ROOT from kimodo.skeleton import SOMASkeleton30, SOMASkeleton77, global_rots_to_local_rots SOMA_MHR_NEUTRAL_PATH = "somaskel30/soma_base_fit_mhr_params.npz" class SOMASkin: def __init__( self, skeleton, ): self.skeleton = skeleton assert isinstance( skeleton, (SOMASkeleton30, SOMASkeleton77) ), "SOMASkin currently only supports SOMASkeleton30 or SOMASkeleton77" assert skeleton.neutral_joints is not None, "The skeleton must have neutral joints instantiated" device = skeleton.neutral_joints.device device = "cpu" self.device = device self._soma_model = SOMALayer( identity_model_type="mhr", device=device, ) self.faces = self._soma_model.faces neutral_mhr_path = Path(skeleton.folder).parent / SOMA_MHR_NEUTRAL_PATH neutral_mhr = np.load(neutral_mhr_path) # one time call to prepare the identity self.soma_identity = torch.from_numpy(neutral_mhr["identity_params"]) self.scale_params = torch.from_numpy(neutral_mhr["scale_params"]) self._soma_model.prepare_identity(self.soma_identity.to(device), scale_params=self.scale_params.to(device)) # dummy output to get bind_vertices transl = torch.zeros(1, 3, device=device) self._full_skeleton = SOMASkeleton77() self.skel_slice = self.skeleton.get_skel_slice(self._full_skeleton) self.bind_vertices = self.soma_model_pose( self._full_skeleton.relaxed_hands_rest_pose[None], transl=transl, pose2rot=False, )["vertices"][0] def soma_model_pose(self, *args, **kwargs): with torch.inference_mode(): return self._soma_model.pose(*args, **kwargs) def skin(self, joint_rotmat, joint_pos, rot_is_global=False): """ joint_rotmat: [T, J, 3, 3] local or global joint rotation matrices joint_pos: [T, J, 3] global joint positions rot_is_global: bool, if True, joint_rotmat is global rotation matrices, otherwise it is local rotation matrices and FK is performed internally """ nF, nJ = joint_pos.shape[:2] if rot_is_global: local_joint_rots_mats_subset = global_rots_to_local_rots(joint_rotmat, self.skeleton) else: local_joint_rots_mats_subset = joint_rotmat if nJ != self._full_skeleton.nbjoints: local_joint_rots_mats = self.skeleton.to_SOMASkeleton77(local_joint_rots_mats_subset) else: local_joint_rots_mats = local_joint_rots_mats_subset # remove the skeleton offset of the root joint transl = joint_pos[:, self.skeleton.root_idx] - self.skeleton.neutral_joints[0:1] output = self.soma_model_pose( local_joint_rots_mats.to(device=self.device, dtype=torch.float32), transl=transl.to(device=self.device, dtype=torch.float32), pose2rot=False, ) return output["vertices"] ================================================ FILE: kimodo/viz/soma_skin.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """SOMA skeleton skinning for visualization (SOMASkeleton30 / SOMASkeleton77).""" from pathlib import Path import numpy as np import torch from kimodo.skeleton import ( SOMASkeleton30, SOMASkeleton77, batch_rigid_transform, global_rots_to_local_rots, ) # Skin for SOMASkeleton77 SKEL_PATH = "somaskel77" SKIN_NAME = "skin_standard.npz" class SOMASkin: def __init__(self, skeleton): skel_path = Path(skeleton.folder).parent / SKEL_PATH skin_data_path = skel_path / SKIN_NAME self.skeleton_input = skeleton assert isinstance( skeleton, (SOMASkeleton30, SOMASkeleton77) ), "SOMASkin currently only supports SOMASkeleton30 or SOMASkeleton77" assert skeleton.neutral_joints is not None, "The skeleton must have neutral joints instantiated" device = skeleton.neutral_joints.device # the skin is always the 77-joint skeleton # if user is using the 30-joint skeleton, we will pad it when skinning is called self.skeleton_skin = SOMASkeleton77(skel_path).to(device) # bind_rig_transform: [R, 4, 4] # bind_vertices: [V, 3] # faces: [F, 3] # lbs indices, lbs weights: [V, W] (W = max (num joints vertice is related to), in our case W=5) skin_data = np.load(skin_data_path) bind_rig_np = np.array(skin_data["bind_rig_transform"], dtype=np.float32) self.bind_rig_transform = torch.from_numpy(bind_rig_np).to(device=device, dtype=torch.float) # Precompute the inverse in numpy to avoid torch lazy evaluation issues bind_rig_inv_np = np.linalg.inv(bind_rig_np) self.bind_rig_transform_inv = torch.from_numpy(bind_rig_inv_np).to(device=device, dtype=torch.float) self.bind_vertices = torch.tensor(skin_data["bind_vertices"], device=device, dtype=torch.float) self.faces = torch.tensor(skin_data["faces"], device=device, dtype=torch.long) self.lbs_indices = torch.tensor(skin_data["lbs_indices"], device=device, dtype=torch.long) self.lbs_weights = torch.tensor(skin_data["lbs_weights"], device=device, dtype=torch.float) # double check the rig matches expected skeleton rig_joint_names = list(skin_data["rig_joint_names"]) # list(str) : [R] for sname, rname in zip(self.skeleton_skin.bone_order_names, rig_joint_names): if sname != rname: raise ValueError(f"MISMATCH in skinnging rig: expected='{sname}' vs rig='{rname}'") def lbs(self, posed_transform): bind_rig_transform_inv = self.bind_rig_transform_inv bind_vertices = self.bind_vertices lbs_weights = self.lbs_weights # posed_transform: [B, F, J, 4, 4] or [B, J, 4, 4] or [J, 4, 4] # unsqueeze to match posed_transform dim for _ in range(posed_transform.dim() - 3): bind_rig_transform_inv = bind_rig_transform_inv.unsqueeze(0) bind_vertices = bind_vertices.unsqueeze(0) lbs_weights = lbs_weights.unsqueeze(0) # bind_rig_transform_inv: [..., R, 4, 4] # bind_vertices: [..., V, 3] # lbs_weights: [..., V, W] affine_mat = (posed_transform @ bind_rig_transform_inv)[..., :3, :] # [..., J, 3, 4] vs = ( affine_mat[..., self.lbs_indices, :, :] @ torch.concat([bind_vertices, torch.ones_like(bind_vertices[..., 0:1])], dim=-1)[..., None, :, None] ) # [..., V, W, 3, 1] ws = lbs_weights[..., None, None] resv = (vs * ws).sum(dim=-3).squeeze(-1) # [..., V, 3] return resv def skin(self, joint_rotmat, joint_pos, rot_is_global=False): """ joint_rotmat: [T, J, 3, 3] local or global joint rotation matrices joint_pos: [T, J, 3] global joint positions rot_is_global: bool, if True, joint_rotmat is global rotation matrices, otherwise it is local rotation matrices and FK is performed internally """ nF, nJ = joint_pos.shape[:2] device = joint_rotmat.device if nJ != self.skeleton_skin.nbjoints: assert nJ == 30, "SOMASkin currently only supports 30-joint or 77-joint skeletons" # make sure we have local joint rotations if rot_is_global: local_joint_rots_mats_subset = global_rots_to_local_rots(joint_rotmat, self.skeleton_input) else: local_joint_rots_mats_subset = joint_rotmat local_joint_rots_mats = self.skeleton_input.to_SOMASkeleton77(local_joint_rots_mats_subset) # FK to get the global joint pos and rot neutral_joints_seq = self.skeleton_skin.neutral_joints[None].repeat((nF, 1, 1)).to(device) new_joint_pos, joint_rotmat = batch_rigid_transform( local_joint_rots_mats, neutral_joints_seq, self.skeleton_skin.joint_parents.to(device), self.skeleton_skin.root_idx, ) joint_pos = new_joint_pos + joint_pos[:, self.skeleton_input.root_idx : self.skeleton_input.root_idx + 1] nJ = self.skeleton_skin.nbjoints rot_is_global = True # prepare full transformation matrices fk_transform = torch.eye(4, device=device)[None, None].repeat(nF, nJ, 1, 1) fk_transform[..., :3, 3] = joint_pos if rot_is_global: fk_transform[..., :3, :3] = joint_rotmat else: neutral_joints_seq = self.skeleton_skin.neutral_joints[None].repeat((nF, 1, 1)).to(device) # FK to get the global rotations _, global_joint_rotmat = batch_rigid_transform( joint_rotmat, neutral_joints_seq, self.skeleton_skin.joint_parents.to(device), self.skeleton_skin.root_idx, ) fk_transform[..., :3, :3] = global_joint_rotmat vertices = self.lbs(fk_transform) return vertices ================================================ FILE: kimodo/viz/viser_utils.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Viser-based 3D viz: re-exports from viz submodules for backward compatibility.""" import os from .constraint_ui import ( ConstraintSet, EEJointsKeyframeSet, FullbodyKeyframeSet, RootKeyframe2DSet, build_constraint_set_table_markdown, update_interval, ) from .gui import GuiElements from .playback import CharacterMotion from .scene import ( DARK_THEME, LIGHT_THEME, SKIN_CACHE, Character, SkeletonMesh, WaypointMesh, ) def load_example_cases(examples_base_dir): """List subdirectories of examples_base_dir as a name -> path dict.""" example_dirs = os.listdir(examples_base_dir) example_names = sorted([d for d in example_dirs if os.path.isdir(os.path.join(examples_base_dir, d))]) return {name: os.path.join(examples_base_dir, name) for name in example_names} __all__ = [ "Character", "CharacterMotion", "ConstraintSet", "DARK_THEME", "EEJointsKeyframeSet", "FullbodyKeyframeSet", "GuiElements", "LIGHT_THEME", "RootKeyframe2DSet", "SKIN_CACHE", "SkeletonMesh", "WaypointMesh", "build_constraint_set_table_markdown", "load_example_cases", "update_interval", ] ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] name = "kimodo" version = "1.0.0" description = "Kimodo motion generation model" readme = "README.md" requires-python = ">=3.8" license = {text = "Apache-2.0"} dependencies = [ "hydra-core>=1.3", "omegaconf>=2.3", "numpy>=1.23", "scipy>=1.10", "transformers==5.1.0", "urllib3>=2.6.3", "boto3", "peft>=0.18", "einops>=0.7", "tqdm>=4.0", "packaging>=21.0", "pydantic>=2.0", "filelock>=3.20.3", "gradio>=6.8.0", "gradio_client>=1.0", "trimesh>=3.21.7", "scenepic>=1.1.0", "pillow>=9.0", "av>=16.1.0", "bvhio", ] [project.optional-dependencies] demo = [ "viser @ git+https://github.com/nv-tlabs/kimodo-viser.git", ] soma = [ "py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git" ] all = [ "viser @ git+https://github.com/nv-tlabs/kimodo-viser.git", "py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git" ] [project.scripts] kimodo_gen = "kimodo.scripts.generate:main" kimodo_demo = "kimodo.demo:main" kimodo_textencoder = "kimodo.scripts.run_text_encoder_server:main" kimodo_convert = "kimodo.scripts.motion_convert:main" [tool.setuptools] include-package-data = true zip-safe = false [tool.setuptools.package-data] kimodo = ["assets/**/*"] [tool.flake8] max-line-length = 120 [tool.ruff] extend-select = ["I001"] # Enable import sorting line-length = 120 [tool.ruff.lint.isort] known-first-party = ["kimodo"] known-third-party = ["torch", "numpy", "pytorch_lightning", "wandb", "tqdm"] force-sort-within-sections = false ================================================ FILE: setup.py ================================================ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import os import shutil import subprocess import sys from pathlib import Path from setuptools import Extension, find_packages, setup from setuptools.command.build_ext import build_ext class CMakeExtension(Extension): def __init__(self, name, sourcedir=""): super().__init__(name, sources=[]) self.sourcedir = os.path.abspath(sourcedir) class CMakeBuild(build_ext): def run(self): try: subprocess.check_output(["cmake", "--version"]) except OSError as exc: raise RuntimeError("CMake must be installed to build this package") from exc for ext in self.extensions: self.build_extension(ext) def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) cmake_args = [ f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}", f"-DPYTHON_EXECUTABLE={sys.executable}", ] cfg = "Debug" if self.debug else "Release" build_args = ["--config", cfg] cmake_args.append(f"-DCMAKE_BUILD_TYPE={cfg}") use_mingw = False mingw_bin = None if sys.platform == "win32": generator = os.environ.get("CMAKE_GENERATOR", "") if generator: cmake_args = ["-G", generator] + cmake_args if "mingw" in generator.lower(): use_mingw = True else: cmake_args.append(f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}") else: try: subprocess.check_output(["g++", "--version"], stderr=subprocess.STDOUT) use_mingw = True cmake_args = ["-G", "MinGW Makefiles"] + cmake_args build_args = [] except (OSError, subprocess.CalledProcessError): cmake_args.append(f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}") if use_mingw: gxx_path = shutil.which("g++") if gxx_path: mingw_bin = Path(gxx_path).parent else: build_args += ["--", "-j4"] env = os.environ.copy() env["CXXFLAGS"] = f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"' if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) subprocess.check_call(["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp) if use_mingw and mingw_bin is not None: runtime_libs = [ "libstdc++-6.dll", "libgcc_s_seh-1.dll", "libwinpthread-1.dll", ] extdir_path = Path(extdir) extdir_path.mkdir(parents=True, exist_ok=True) for lib_name in runtime_libs: src_path = mingw_bin / lib_name if src_path.exists(): shutil.copy2(src_path, extdir_path / lib_name) else: self.announce( f"Warning: Expected MinGW runtime DLL '{lib_name}' not found next to g++ (looked in {mingw_bin}). " "The built extension may fail to import if the DLL is not on PATH.", level=3, ) kimodo_packages = find_packages(include=["kimodo", "kimodo.*"]) # When set (e.g. in Docker), do not bundle motion_correction here; it is installed # separately (e.g. from docker_requirements.txt as ./MotionCorrection) non-editable. skip_motion_correction = os.environ.get("SKIP_MOTION_CORRECTION_IN_SETUP", "").strip().lower() in ("1", "true", "yes") if skip_motion_correction: packages = kimodo_packages package_dir = {} ext_modules = [] cmdclass = {} else: packages = kimodo_packages + ["motion_correction"] package_dir = {"motion_correction": "MotionCorrection/python/motion_correction"} ext_modules = [CMakeExtension("motion_correction._motion_correction", "MotionCorrection")] cmdclass = {"build_ext": CMakeBuild} setup( packages=packages, package_dir=package_dir, ext_modules=ext_modules, cmdclass=cmdclass, )