Repository: nv-tlabs/kimodo
Branch: main
Commit: 54257dd8ff18
Files: 320
Total size: 1.6 MB

Directory structure:
gitextract__xaaoo6k/

├── .gitignore
├── .pre-commit-config.yaml
├── ATTRIBUTIONS.MD
├── CHANGELOG.md
├── CONTRIBUTING.MD
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── MotionCorrection/
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── MANIFEST.in
│   ├── README.md
│   ├── python/
│   │   └── motion_correction/
│   │       ├── __init__.py
│   │       └── motion_postprocess.py
│   ├── run_test.py
│   ├── setup.py
│   └── src/
│       └── cpp/
│           ├── AnimProcessing/
│           │   ├── InverseKinematics.cpp
│           │   ├── InverseKinematics.h
│           │   ├── TrajectoryCorrector.cpp
│           │   ├── TrajectoryCorrector.h
│           │   ├── Utility.cpp
│           │   └── Utility.h
│           ├── BindingsPython.cpp
│           ├── Compiler.h
│           ├── Debug.h
│           ├── Math/
│           │   ├── Constants.h
│           │   ├── Matrix.cpp
│           │   ├── Matrix.h
│           │   ├── Matrix.inl
│           │   ├── Quaternion.cpp
│           │   ├── Quaternion.h
│           │   ├── Quaternion.inl
│           │   ├── SIMD.h
│           │   ├── Scalar.h
│           │   ├── Transform.cpp
│           │   ├── Transform.h
│           │   ├── Transform.inl
│           │   ├── Types.cpp
│           │   ├── Types.h
│           │   ├── Vector.cpp
│           │   ├── Vector.h
│           │   └── Vector.inl
│           └── Platform.h
├── README.md
├── benchmark/
│   ├── create_benchmark.py
│   ├── embed_folder.py
│   ├── evaluate_folder.py
│   ├── generate_eval.py
│   └── parse_folder.py
├── docker-compose.yaml
├── docker_requirements.in
├── docker_requirements.txt
├── docs/
│   ├── .gitattributes
│   ├── Makefile
│   ├── README.md
│   ├── make.bat
│   ├── requirements.txt
│   └── source/
│       ├── _static/
│       │   └── custom.css
│       ├── _templates/
│       │   └── apidoc/
│       │       ├── module.rst.jinja
│       │       └── package.rst.jinja
│       ├── api_reference/
│       │   ├── _generated/
│       │   │   ├── kimodo.demo.rst
│       │   │   ├── kimodo.exports.rst
│       │   │   ├── kimodo.metrics.rst
│       │   │   ├── kimodo.model.llm2vec.models.rst
│       │   │   ├── kimodo.model.llm2vec.rst
│       │   │   ├── kimodo.model.rst
│       │   │   ├── kimodo.motion_rep.reps.rst
│       │   │   ├── kimodo.motion_rep.rst
│       │   │   ├── kimodo.rst
│       │   │   ├── kimodo.scripts.rst
│       │   │   ├── kimodo.skeleton.rst
│       │   │   ├── kimodo.viz.rst
│       │   │   └── modules.rst
│       │   ├── constraints.rst
│       │   ├── exports.rst
│       │   ├── index.rst
│       │   ├── model.rst
│       │   ├── motion_rep.rst
│       │   ├── post-processing.rst
│       │   ├── utilities.rst
│       │   └── viz.rst
│       ├── benchmark/
│       │   ├── introduction.md
│       │   ├── metrics.md
│       │   ├── pipeline.md
│       │   └── results.md
│       ├── conf.py
│       ├── getting_started/
│       │   ├── installation.md
│       │   ├── installation_docker.md
│       │   ├── installation_smpl.md
│       │   ├── installation_virtual_env.md
│       │   └── quick_start.md
│       ├── index.md
│       ├── interactive_demo/
│       │   ├── constraints.md
│       │   ├── examples.md
│       │   ├── export_results.md
│       │   ├── generation.md
│       │   ├── index.md
│       │   ├── launching.md
│       │   ├── model_selection.md
│       │   └── ui_overview.md
│       ├── key_concepts/
│       │   ├── constraints.md
│       │   ├── limitations.md
│       │   ├── model.md
│       │   ├── motion_representation.md
│       │   └── skeleton.md
│       ├── project_info.md
│       ├── project_structure.md
│       └── user_guide/
│           ├── cli.md
│           ├── configuration.md
│           ├── constraints.md
│           ├── motion_convert.md
│           ├── output_formats.md
│           └── seed_dataset.md
├── kimodo/
│   ├── __init__.py
│   ├── assets/
│   │   ├── demo/
│   │   │   └── examples/
│   │   │       ├── kimodo-g1-rp/
│   │   │       │   ├── 01_single_text_prompt/
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   ├── 02_multi_text_ee_constraint/
│   │   │       │   │   ├── constraints.json
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   ├── 03_full_body_keyframes/
│   │   │       │   │   ├── constraints.json
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   ├── 04_ee_constraint/
│   │   │       │   │   ├── constraints.json
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   ├── 05_root_path/
│   │   │       │   │   ├── constraints.json
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   ├── 06_root_waypoints/
│   │   │       │   │   ├── constraints.json
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   ├── 07_text_terrain/
│   │   │       │   │   ├── meta.json
│   │   │       │   │   └── motion.npz
│   │   │       │   └── 08_text_object/
│   │   │       │       ├── meta.json
│   │   │       │       └── motion.npz
│   │   │       └── kimodo-soma-rp/
│   │   │           ├── 01_single_text_prompt/
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           ├── 02_multi_text_prompt/
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           ├── 03_full_body_keyframes/
│   │   │           │   ├── constraints.json
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           ├── 04_ee_constraint/
│   │   │           │   ├── constraints.json
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           ├── 05_root_path/
│   │   │           │   ├── constraints.json
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           ├── 06_root_waypoints/
│   │   │           │   ├── constraints.json
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           ├── 07_mixed_constraints/
│   │   │           │   ├── constraints.json
│   │   │           │   ├── meta.json
│   │   │           │   └── motion.npz
│   │   │           └── 08_stylized_text/
│   │   │               ├── meta.json
│   │   │               └── motion.npz
│   │   └── skeletons/
│   │       ├── g1skel34/
│   │       │   ├── joints.p
│   │       │   ├── meshes/
│   │       │   │   └── g1/
│   │       │   │       ├── head_link.STL
│   │       │   │       ├── left_ankle_pitch_link.STL
│   │       │   │       ├── left_ankle_roll_link.STL
│   │       │   │       ├── left_elbow_link.STL
│   │       │   │       ├── left_hand_index_0_link.STL
│   │       │   │       ├── left_hand_index_1_link.STL
│   │       │   │       ├── left_hand_middle_0_link.STL
│   │       │   │       ├── left_hand_middle_1_link.STL
│   │       │   │       ├── left_hand_palm_link.STL
│   │       │   │       ├── left_hand_thumb_0_link.STL
│   │       │   │       ├── left_hand_thumb_1_link.STL
│   │       │   │       ├── left_hand_thumb_2_link.STL
│   │       │   │       ├── left_hip_pitch_link.STL
│   │       │   │       ├── left_hip_roll_link.STL
│   │       │   │       ├── left_hip_yaw_link.STL
│   │       │   │       ├── left_knee_link.STL
│   │       │   │       ├── left_rubber_hand.STL
│   │       │   │       ├── left_shoulder_pitch_link.STL
│   │       │   │       ├── left_shoulder_roll_link.STL
│   │       │   │       ├── left_shoulder_yaw_link.STL
│   │       │   │       ├── left_wrist_pitch_link.STL
│   │       │   │       ├── left_wrist_roll_link.STL
│   │       │   │       ├── left_wrist_roll_rubber_hand.STL
│   │       │   │       ├── left_wrist_yaw_link.STL
│   │       │   │       ├── logo_link.STL
│   │       │   │       ├── pelvis.STL
│   │       │   │       ├── pelvis_contour_link.STL
│   │       │   │       ├── right_ankle_pitch_link.STL
│   │       │   │       ├── right_ankle_roll_link.STL
│   │       │   │       ├── right_elbow_link.STL
│   │       │   │       ├── right_hand_index_0_link.STL
│   │       │   │       ├── right_hand_index_1_link.STL
│   │       │   │       ├── right_hand_middle_0_link.STL
│   │       │   │       ├── right_hand_middle_1_link.STL
│   │       │   │       ├── right_hand_palm_link.STL
│   │       │   │       ├── right_hand_thumb_0_link.STL
│   │       │   │       ├── right_hand_thumb_1_link.STL
│   │       │   │       ├── right_hand_thumb_2_link.STL
│   │       │   │       ├── right_hip_pitch_link.STL
│   │       │   │       ├── right_hip_roll_link.STL
│   │       │   │       ├── right_hip_yaw_link.STL
│   │       │   │       ├── right_knee_link.STL
│   │       │   │       ├── right_rubber_hand.STL
│   │       │   │       ├── right_shoulder_pitch_link.STL
│   │       │   │       ├── right_shoulder_roll_link.STL
│   │       │   │       ├── right_shoulder_yaw_link.STL
│   │       │   │       ├── right_wrist_pitch_link.STL
│   │       │   │       ├── right_wrist_roll_link.STL
│   │       │   │       ├── right_wrist_roll_rubber_hand.STL
│   │       │   │       ├── right_wrist_yaw_link.STL
│   │       │   │       ├── torso_constraint_L_link.STL
│   │       │   │       ├── torso_constraint_L_rod_link.STL
│   │       │   │       ├── torso_constraint_R_link.STL
│   │       │   │       ├── torso_constraint_R_rod_link.STL
│   │       │   │       ├── torso_link.STL
│   │       │   │       ├── torso_link_23dof_rev_1_0.STL
│   │       │   │       ├── torso_link_rev_1_0.STL
│   │       │   │       ├── waist_constraint_L.STL
│   │       │   │       ├── waist_constraint_R.STL
│   │       │   │       ├── waist_roll_link.STL
│   │       │   │       ├── waist_roll_link_rev_1_0.STL
│   │       │   │       ├── waist_support_link.STL
│   │       │   │       ├── waist_yaw_link.STL
│   │       │   │       └── waist_yaw_link_rev_1_0.STL
│   │       │   ├── rest_pose_local_rot.p
│   │       │   └── xml/
│   │       │       └── g1.xml
│   │       ├── smplx22/
│   │       │   ├── beta.npy
│   │       │   ├── joints.p
│   │       │   └── mean_hands.npy
│   │       ├── somaskel30/
│   │       │   ├── joints.p
│   │       │   └── soma_base_fit_mhr_params.npz
│   │       └── somaskel77/
│   │           ├── bvh_joints.p
│   │           ├── joints.p
│   │           ├── relaxed_hands_rest_pose.npy
│   │           ├── skin_standard.npz
│   │           ├── somaskel77_standard_tpose.bvh
│   │           └── standard_t_pose_global_offsets_rots.p
│   ├── assets.py
│   ├── constraints.py
│   ├── demo/
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── app.py
│   │   ├── config.py
│   │   ├── embedding_cache.py
│   │   ├── generation.py
│   │   ├── queue_manager.py
│   │   ├── state.py
│   │   └── ui.py
│   ├── exports/
│   │   ├── __init__.py
│   │   ├── bvh.py
│   │   ├── motion_convert_lib.py
│   │   ├── motion_formats.py
│   │   ├── motion_io.py
│   │   ├── mujoco.py
│   │   └── smplx.py
│   ├── geometry.py
│   ├── meta.py
│   ├── metrics/
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── constraints.py
│   │   ├── foot_skate.py
│   │   └── tmr.py
│   ├── model/
│   │   ├── __init__.py
│   │   ├── backbone.py
│   │   ├── cfg.py
│   │   ├── common.py
│   │   ├── diffusion.py
│   │   ├── kimodo_model.py
│   │   ├── llm2vec/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── llm2vec.py
│   │   │   ├── llm2vec_wrapper.py
│   │   │   └── models/
│   │   │       ├── __init__.py
│   │   │       ├── attn_mask_utils.py
│   │   │       ├── bidirectional_llama.py
│   │   │       └── utils.py
│   │   ├── load_model.py
│   │   ├── loading.py
│   │   ├── registry.py
│   │   ├── text_encoder_api.py
│   │   ├── tmr.py
│   │   └── twostage_denoiser.py
│   ├── motion_rep/
│   │   ├── __init__.py
│   │   ├── conditioning.py
│   │   ├── feature_utils.py
│   │   ├── feet.py
│   │   ├── reps/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── kimodo_motionrep.py
│   │   │   └── tmr_motionrep.py
│   │   ├── smooth_root.py
│   │   └── stats.py
│   ├── postprocess.py
│   ├── sanitize.py
│   ├── scripts/
│   │   ├── __init__.py
│   │   ├── docker-entrypoint.sh
│   │   ├── generate.py
│   │   ├── gradio_theme.py
│   │   ├── lock_requirements.py
│   │   ├── motion_convert.py
│   │   ├── mujoco_load.py
│   │   └── run_text_encoder_server.py
│   ├── skeleton/
│   │   ├── __init__.py
│   │   ├── base.py
│   │   ├── bvh.py
│   │   ├── definitions.py
│   │   ├── kinematics.py
│   │   ├── registry.py
│   │   └── transforms.py
│   ├── tools.py
│   └── viz/
│       ├── __init__.py
│       ├── constraint_ui.py
│       ├── coords.py
│       ├── g1_rig.py
│       ├── gui.py
│       ├── playback.py
│       ├── scene.py
│       ├── smplx_skin.py
│       ├── soma_layer_skin.py
│       ├── soma_skin.py
│       └── viser_utils.py
├── pyproject.toml
└── setup.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# debugging files
debug/
SMPLX_NEUTRAL.npz

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

datasets

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# vscode
.vscode
*.code-workspace
/pyrightconfig.json
wandb/

# others
out
tmr_out
.ruff_cache
outputs
/debug
/batch*.sh
checkpoints/**/test/*
nohup.out

*.swp
*.swo
*.txt~*
*.un~
*~
train_done
.aider*
onelogger.err

# deploy files
/helm-library


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  # code formatting
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.6.4
    hooks:
      - id: ruff
        name: sort imports with ruff
        args: [--select, I, --fix]
      - id: ruff-format
        name: format with ruff

  # docstring formatting
  - repo: https://github.com/PyCQA/docformatter
    rev: v1.7.7
    hooks:
      - id: docformatter
        args:
          [
            --in-place,
            --wrap-summaries=100,
            --wrap-descriptions=100,
            --style=sphinx,
          ]

  # yaml formatting
  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: v3.0.0-alpha.6
    hooks:
      - id: prettier
        types: [yaml]
        exclude: |
          (?x)^(
            environment\.yaml$ |
            \.gitlab-ci\.yml$ |
            \.k8s/.*\.(ya?ml)$
          )

  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.0.1
    hooks:
      - id: trailing-whitespace # Trims trailing whitespace.
      - id: end-of-file-fixer # Makes sure files end in a newline and only a newline.
      - id: check-yaml # Attempts to load all yaml files to verify syntax.
        exclude: |
          (?x)^(
            \.gitlab-ci\.yml$ |
            \.k8s/.*\.(ya?ml)$
          )

exclude: "checkpoints/.*"


================================================
FILE: ATTRIBUTIONS.MD
================================================
LLM2Vec MIT License https://github.com/McGill-NLP/llm2vec Copyright (c) 2024 McGill NLP

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Unitree mujoco BSD 3-Clause License https://github.com/unitreerobotics/unitree_mujoco/blob/main/LICENSE
Copyright (c) 2016-2024 HangZhou YuShu TECHNOLOGY CO.,LTD. ("Unitree Robotics")
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: CHANGELOG.md
================================================
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [2026-05-03]

### Fixed
- `benchmark/parse_folder.py` now averages each metric only over the testcases that actually report it. Previously, sparse constraint metrics (`constraint_root2d_acc`, `constraint_root2d_err`, `constraint_root2d_err_p95`, `constraint_fullbody_keyframe`, `constraint_end_effector`) were divided by the total motion count of the (split, category), including testcases of other constraint kinds that did not report the metric. This silently scaled values by `motions_with_metric / total_motions` (e.g. `constraint_root2d_acc` displayed as ~0.57 when the true value was ~0.93). Both the printed table and `summary_rows.json` are affected, including the combined constraints row that merges `constraints_withtext` and `constraints_notext`. Text-following metrics, foot-skate/contact metrics, and TMR metrics are unchanged.
- Updated Kimodo benchmark results in the documentation with this fix applied.

## [2026-04-24]

### Added
- Support for `TEXT_ENCODER_DEVICE` environment variable to force LLM2Vec to use the CPU instead of GPU. Setting `TEXT_ENCODER_DEVICE=cpu` reduces VRAM usage to <3 GB with a fairly small speed hit.
- `--save_example_dir` argument to `kimodo_gen` to save outputs to an example directory that can be directly loaded into `kimodo_demo`

### Fixed
- Bug in post-processing that was incorrectly making the smoothed root the target for the root in full-body constraints
- Modified how transitions are handled in multi-prompt generation to improve smoothness

### Removed
- `share_transition` and `percentage_transition_override` options from python API for multi-prompt generation

## [2026-04-13]

### Added
- Option `--bvh_standard_tpose` to use standard T-pose for BVH file saved from `generate.py`
- Option to use standard T-pose for BVH file saved or downloaded from demo
- Option to input/output BVH files that use standard T-pose with `motion_convert.py`
- Added BVH file containing the standard Kimodo T-pose to `kimodo/assets/skeletons/somaskel77/somaskel77_standard_tpose.bvh`
- Updated documentation with these new options

## [2026-04-10]

### Added
- [Kimodo-SOMA-RP-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1) and [Kimodo-SOMA-SEED-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1) models and added support in the codebase. If not specified, the latest version of the models will be used automatically with the demo and CLI.
- [Kimodo Motion Generation Benchmark](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) for standardized evaluation of motion generation models training on the BONES-SEED dataset.
- Scripts to construct the full benchmark, generate motions for test cases, and compute evaluation metrics. 
- Documentation explaining the benchmark and how to use the evaluation pipeline.
- [TMR-SOMA-RP-v1](https://huggingface.co/nvidia/TMR-SOMA-RP-v1) motion-text embedding model to be used for evaluation metrics.
- Added option to load LLM2Vec text encoder in fp32 precision.

### Fixed
- Always use batch size 1 with LLM2Vec to avoid unexpected behavior of different embeddings based on batch size.
- Load LLM2Vec directly onto the GPU, if available.
- Updated documentation on constraints with more details.

## [2026-04-01]

### Fixed
- Fix unnecessary text encoder reload when switching between models in the interactive demo (if not using the text encoder server API).

## [2026-03-31]

### Added
- New `kimodo_convert` CLI tool for converting generated motions between formats (NPZ, BVH, MuJoCo CSV, AMASS NPZ).
- Support for loading and saving BVH, CSV, and NPZ motion files in the interactive demo.

## [2026-03-27]

### Fixed
- Bug fix for foot contact visualization in the interactive demo.
- Patch bug with BVH export for SOMA models.

## [2026-03-19]

### Changed
- **Breaking:** Model inputs/outputs now use the SOMA 77-joint skeleton (`somaskel77`). This affects saved motion formats and constraint files from previous versions.

### Added
- Released timeline annotations for the BONES-SEED dataset on HuggingFace.

## [2026-03-16] - Initial Release

### Added
- Open-source release of Kimodo codebase under Apache-2.0 license.
- Five model variants: Kimodo-SOMA-RP-v1, Kimodo-G1-RP-v1, Kimodo-SOMA-SEED-v1, Kimodo-G1-SEED-v1, Kimodo-SMPLX-RP-v1.
- Command-line interface (`kimodo_gen`) for motion generation with text prompts and kinematic constraints.
- Interactive web-based motion authoring demo (`kimodo_demo`) with timeline editor, constraint tracks, and 3D visualization.
- Support for multiple output formats: default NPZ, MuJoCo qpos CSV (G1), AMASS NPZ (SMPL-X).
- Documentation site with quick start guide, installation instructions, CLI reference, and API docs.
- Compatibility with downstream tools: ProtoMotions (physics-based policy training) and GMR (motion retargeting).


================================================
FILE: CONTRIBUTING.MD
================================================
# How to Contribute

## Code Reviews

All submissions require review. We use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests.

## Signing Your Work

* We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license.

  * Any contribution which contains commits that are not Signed-Off will not be accepted.

* To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes:
  ```bash
  $ git commit -s -m "Add cool feature."
  ```
  This will append the following to your commit message:
  ```
  Signed-off-by: Your Name <your@email.com>
  ```

* Full text of the DCO:

  ```
    Developer Certificate of Origin
    Version 1.1

    Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
    1 Letterman Drive
    Suite D4700
    San Francisco, CA, 94129

    Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
  ```

  ```
    Developer's Certificate of Origin 1.1

    By making a contribution to this project, I certify that:

    (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or

    (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or

    (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.

    (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
  ```


================================================
FILE: Dockerfile
================================================
FROM nvcr.io/nvidia/pytorch:24.10-py3

# Avoid some interactive prompts + make pip quieter/reproducible-ish
ENV DEBIAN_FRONTEND=noninteractive \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1

# Where your code will live inside the container
WORKDIR /workspace

# System deps
RUN apt-get update && apt-get install -y --no-install-recommends \
      git curl ca-certificates \
      cmake build-essential \
      gosu \
    && rm -rf /var/lib/apt/lists/*

# Some base images ship a broken `/usr/local/bin/cmake` shim (from a partial pip install),
# which shadows `/usr/bin/cmake` and breaks builds that invoke `cmake` (e.g. MotionCorrection).
# Prefer the system cmake.
RUN rm -f /usr/local/bin/cmake || true

# Install from docker_requirements.txt: kimodo editable (-e .),
# but MotionCorrection non-editable (./MotionCorrection). The -e . line ensures [project.scripts]
# from pyproject.toml are installed (kimodo_gen, kimodo_demo, kimodo_textencoder).
# SKIP_MOTION_CORRECTION_IN_SETUP=1 so setup.py does not bundle motion_correction; it is
# installed separately from ./MotionCorrection in the requirements file (non-editable).
COPY docker_requirements.txt /workspace/docker_requirements.txt
COPY setup.py /workspace/setup.py
COPY pyproject.toml /workspace/pyproject.toml
COPY kimodo /workspace/kimodo
COPY kimodo-viser /workspace/kimodo-viser
COPY MotionCorrection /workspace/MotionCorrection

RUN --mount=type=cache,target=/root/.cache/pip \
    python -m pip install --upgrade pip \
 && SKIP_MOTION_CORRECTION_IN_SETUP=1 python -m pip install -r docker_requirements.txt

# Use the docker-entrypoint script, to allow the docker to run as the actual user instead of root
COPY kimodo/scripts/docker-entrypoint.sh /usr/local/bin/docker-entrypoint
RUN chmod +x /usr/local/bin/docker-entrypoint

# Default command (change to your entrypoint if you have one)
ENTRYPOINT ["docker-entrypoint"]
CMD ["bash"]


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include setup.py
recursive-include kimodo/assets *
recursive-include MotionCorrection/src *.cpp *.h *.inl
recursive-include MotionCorrection/python *.py *.dll
include MotionCorrection/CMakeLists.txt
include MotionCorrection/test_example.py


================================================
FILE: MotionCorrection/.gitignore
================================================
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
*.egg
*.egg-info/
dist/
build/
*.whl
.Python
develop-eggs/
.installed.cfg
pip-log.txt
pip-delete-this-directory.txt
.pytest_cache/
.coverage
htmlcov/
.tox/
.venv
venv/
ENV/
env/

# C/C++
*.o
*.obj
*.exe
*.out
*.app
*.dll
*.dylib
*.lib
*.a
*.la
*.lo
*.slo
*.ko
*.elf
*.ilk
*.map
*.exp
*.gch
*.pch
*.idb
*.pdb
*.mod
*.smod
*.lai

# CMake
CMakeCache.txt
CMakeFiles/
CMakeScripts/
cmake_install.cmake
install_manifest.txt
CTestTestfile.cmake
_deps/
cmake-build-*/
CMakeUserPresets.json

# IDE
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
*.iml
.project
.cproject
.settings/

# Visual Studio
.vs/
*.user
*.suo
*.userosscache
*.sln.docstates
*.VC.db
*.VC.opendb

# Build directories
build/
Build/
out/
dist/
temp/

# Logs
*.log


================================================
FILE: MotionCorrection/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.15)
project(motion_correction)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Find Python
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)

# Find or fetch pybind11
find_package(pybind11 CONFIG QUIET)
if(NOT pybind11_FOUND)
    message(STATUS "pybind11 not found, fetching from GitHub...")
    include(FetchContent)
    FetchContent_Declare(
        pybind11
        GIT_REPOSITORY https://github.com/pybind/pybind11.git
        GIT_TAG v2.11.1
    )
    FetchContent_MakeAvailable(pybind11)
endif()

# Find or fetch Eigen
find_package(Eigen3 3.3 CONFIG QUIET)
if(NOT Eigen3_FOUND)
    message(STATUS "Eigen3 not found, fetching from GitLab...")
    include(FetchContent)
    FetchContent_Declare(
        Eigen
        GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
        GIT_TAG 3.4.0
    )
    set(EIGEN_BUILD_DOC OFF CACHE BOOL "" FORCE)
    set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
    set(EIGEN_BUILD_PKGCONFIG OFF CACHE BOOL "" FORCE)
    FetchContent_MakeAvailable(Eigen)
endif()

# Source files
set(MATH_SOURCES
    src/cpp/Math/Matrix.cpp
    src/cpp/Math/Quaternion.cpp
    src/cpp/Math/Transform.cpp
    src/cpp/Math/Types.cpp
    src/cpp/Math/Vector.cpp
)

set(ANIM_SOURCES
    src/cpp/AnimProcessing/InverseKinematics.cpp
    src/cpp/AnimProcessing/TrajectoryCorrector.cpp
    src/cpp/AnimProcessing/Utility.cpp
)

# Create static library for the core functionality
add_library(motion_correction_cpp_base STATIC ${MATH_SOURCES} ${ANIM_SOURCES})

# Enable Position Independent Code (required for linking into shared library)
set_target_properties(motion_correction_cpp_base PROPERTIES POSITION_INDEPENDENT_CODE ON)

target_include_directories(motion_correction_cpp_base PUBLIC
    ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp
)

if(TARGET Eigen3::Eigen)
    target_link_libraries(motion_correction_cpp_base PUBLIC Eigen3::Eigen)
else()
    target_link_libraries(motion_correction_cpp_base PUBLIC eigen)
endif()

target_compile_definitions(motion_correction_cpp_base PUBLIC EIGEN_MPL2_ONLY)

# Compiler-specific settings
if(MSVC)
    # MSVC-specific flags
    target_compile_options(motion_correction_cpp_base PRIVATE /W4 /arch:AVX)
else()
    # GCC/Clang flags (also applies to MinGW on Windows)
    # Enable SSE4.1 and AVX instructions for SIMD operations
    target_compile_options(motion_correction_cpp_base PRIVATE -Wall -Wextra -msse4.1 -mavx)
endif()

# Python bindings
pybind11_add_module(_motion_correction src/cpp/BindingsPython.cpp)

target_link_libraries(_motion_correction PRIVATE motion_correction_cpp_base)

target_include_directories(_motion_correction PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp
)


# Install the Python module
install(TARGETS _motion_correction LIBRARY DESTINATION python/motion_correction)
install(FILES python/motion_correction/__init__.py DESTINATION python/motion_correction)
install(FILES python/motion_correction/motion_postprocess.py DESTINATION python/motion_correction)


================================================
FILE: MotionCorrection/MANIFEST.in
================================================
include CMakeLists.txt
include test_example.py
recursive-include src *.cpp *.h *.inl
recursive-include python *.py *.dll


================================================
FILE: MotionCorrection/README.md
================================================
# motion_correction

Standalone `correct_motion` implementation packaged as a small C++ motion trajectory correction library with Python bindings.

## Installation Guide

### Prerequisites

Ensure you have a C++17 compatible compiler (GCC 7.0+, Clang 5.0+, or MSVC 2017+) and CMake 3.15+. On Windows, install MinGW-w64 or Visual Studio with C++ tools. On Linux, install `build-essential` and `cmake`.

This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use.

### Build & Install

#### Standard Installation
```bash
pip install .
```

#### Development Installation
```bash
pip install -e .
```

### Verify Installation

```python
import motion_correction
print("Installation successful!")
```
You can also run `python run_test.py` for a simple test.


================================================
FILE: MotionCorrection/python/motion_correction/__init__.py
================================================
from ._motion_correction import *


================================================
FILE: MotionCorrection/python/motion_correction/motion_postprocess.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os
import pickle

import numpy as np
import torch

import motion_correction


def correct_motion(
    hipTranslations,
    jointRotations,
    contacts,
    hipTranslationsInput,
    rotationsInput,
    constraint_masks,
    contact_threshold,
    root_margin,
    working_rig,
    has_double_ankle_joints=False,
):
    joint_names = [x.name for x in working_rig]
    joint_parents = [
        joint_names.index(working_rig[i].parent) if working_rig[i].parent in joint_names else -1
        for i in range(len(working_rig))
    ]
    joint_ref_translations = [list(x.t_pose_translation) for x in working_rig]
    joint_ref_rotations = [list(x.t_pose_rotation) for x in working_rig]

    left_hand_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "LeftHand"]
    if len(left_hand_idx) != 1:
        raise RuntimeError(f"correct_motion: Expected exactly one joint with LeftHand tag")
    left_hand_idx = left_hand_idx[0]

    right_hand_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "RightHand"]
    if len(right_hand_idx) != 1:
        raise RuntimeError(f"correct_motion: Expected exactly one joint with RightHand tag")
    right_hand_idx = right_hand_idx[0]

    left_foot_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "LeftFoot"]
    if len(left_foot_idx) != 1:
        raise RuntimeError(f"correct_motion: Expected exactly one joint with LeftFoot tag")
    left_foot_idx = left_foot_idx[0]

    right_foot_idx = [i for i in range(len(joint_names)) if working_rig[i].retarget_tag == "RightFoot"]
    if len(right_foot_idx) != 1:
        raise RuntimeError(f"correct_motion: Expected exactly one joint with RightFoot tag")
    right_foot_idx = right_foot_idx[0]

    end_frame = hipTranslations.shape[1]

    default_mask = torch.zeros(hipTranslations.shape[1], dtype=torch.float32)
    root_mask = constraint_masks.get("Root", default_mask)
    full_body_mask = constraint_masks.get("FullBody", default_mask)
    left_hand_mask = constraint_masks.get("LeftHand", default_mask)
    right_hand_mask = constraint_masks.get("RightHand", default_mask)
    left_foot_mask = constraint_masks.get("LeftFoot", default_mask)
    right_foot_mask = constraint_masks.get("RightFoot", default_mask)

    batch_size = hipTranslations.shape[0]

    for b in range(batch_size):
        hipTranslationsCorrected = hipTranslations[b, :end_frame].detach().cpu().flatten().numpy().astype(np.float32)
        rotationsCorrected = jointRotations[b, :end_frame].detach().cpu().flatten().numpy().astype(np.float32)

        hipTranslationsInput_flat = hipTranslationsInput.detach().cpu().flatten().numpy().astype(np.float32)
        rotationsInput_flat = rotationsInput.detach().cpu().flatten().numpy().astype(np.float32)
        ctcs = contacts[b].detach().cpu().flatten().numpy().astype(np.float32)

        motion_correction.correct_motion(
            hipTranslationsCorrected,
            rotationsCorrected,
            hipTranslationsInput_flat,
            rotationsInput_flat,
            full_body_mask,
            left_hand_mask,
            right_hand_mask,
            left_foot_mask,
            right_foot_mask,
            root_mask,
            np.array(ctcs, dtype=np.float32),
            joint_parents,
            joint_ref_translations,
            joint_ref_rotations,
            left_hand_idx,
            right_hand_idx,
            left_foot_idx,
            right_foot_idx,
            contact_threshold,
            root_margin,
            has_double_ankle_joints,
        )

        hipTranslations[b, :end_frame] = torch.from_numpy(
            hipTranslationsCorrected.reshape(*hipTranslations[b, :end_frame].shape)
        )
        jointRotations[b, :end_frame] = torch.from_numpy(
            rotationsCorrected.reshape(*jointRotations[b, :end_frame].shape)
        )


================================================
FILE: MotionCorrection/run_test.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import torch
from motion_correction.motion_postprocess import correct_motion


class Joint:
    def __init__(self, name, parent, t_pose_translation, t_pose_rotation, retarget_tag=""):
        self.name = name
        self.parent = parent
        self.t_pose_translation = t_pose_translation
        self.t_pose_rotation = t_pose_rotation
        self.retarget_tag = retarget_tag


def create_test_rig():
    return [
        Joint("Hips", None, [0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0], "Root"),
        Joint("Spine", "Hips", [0.0, 0.1, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("LeftUpLeg", "Hips", [-0.1, -0.05, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("LeftLeg", "LeftUpLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("LeftFoot", "LeftLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0], "LeftFoot"),
        Joint("RightUpLeg", "Hips", [0.1, -0.05, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("RightLeg", "RightUpLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("RightFoot", "RightLeg", [0.0, -0.4, 0.0], [0.0, 0.0, 0.0, 1.0], "RightFoot"),
        Joint("LeftArm", "Spine", [-0.3, 0.3, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("LeftHand", "LeftArm", [-0.3, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], "LeftHand"),
        Joint("RightArm", "Spine", [0.3, 0.3, 0.0], [0.0, 0.0, 0.0, 1.0]),
        Joint("RightHand", "RightArm", [0.3, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], "RightHand"),
    ]


if __name__ == "__main__":
    # Test data
    batch_size, num_frames, num_joints = 1, 60, 12

    hipTranslations = torch.randn(batch_size, num_frames, 3)
    jointRotations = torch.randn(batch_size, num_frames, num_joints, 4)
    jointRotations = jointRotations / jointRotations.norm(dim=-1, keepdim=True)

    contacts = torch.rand(batch_size, num_frames, 4)
    hipTranslationsInput = hipTranslations.clone()
    rotationsInput = jointRotations.clone()

    constraint_masks = {
        "Root": torch.zeros(num_frames),
        "FullBody": torch.zeros(num_frames),
        "LeftHand": torch.zeros(num_frames),
        "RightHand": torch.zeros(num_frames),
        "LeftFoot": torch.zeros(num_frames),
        "RightFoot": torch.zeros(num_frames),
    }

    working_rig = create_test_rig()

    # Run correction
    correct_motion(
        hipTranslations=hipTranslations,
        jointRotations=jointRotations,
        contacts=contacts,
        hipTranslationsInput=hipTranslationsInput,
        rotationsInput=rotationsInput,
        constraint_masks=constraint_masks,
        contact_threshold=0.5,
        root_margin=0.01,
        working_rig=working_rig,
    )

    print("Test completed successfully")


================================================
FILE: MotionCorrection/setup.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Setup script for correct_motion standalone package."""

import os
import shutil
import subprocess
import sys
from pathlib import Path

from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext


class CMakeExtension(Extension):
    def __init__(self, name, sourcedir=""):
        Extension.__init__(self, name, sources=[])
        self.sourcedir = os.path.abspath(sourcedir)


class CMakeBuild(build_ext):
    def run(self):
        try:
            subprocess.check_output(["cmake", "--version"])
        except OSError:
            raise RuntimeError("CMake must be installed to build this package")

        for ext in self.extensions:
            self.build_extension(ext)

    def build_extension(self, ext):
        # import pdb; pdb.set_trace()  # Debug build process

        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
        cmake_args = [
            f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
            f"-DPYTHON_EXECUTABLE={sys.executable}",
        ]

        cfg = "Debug" if self.debug else "Release"
        build_args = ["--config", cfg]

        cmake_args += [f"-DCMAKE_BUILD_TYPE={cfg}"]

        use_mingw = False
        mingw_bin = None

        if sys.platform == "win32":
            generator = os.environ.get("CMAKE_GENERATOR", "")
            if generator:
                cmake_args = ["-G", generator] + cmake_args
                if "mingw" in generator.lower():
                    use_mingw = True
                else:
                    cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
            else:
                # Try MinGW Makefiles as default on Windows
                try:
                    subprocess.check_output(["g++", "--version"], stderr=subprocess.STDOUT)
                    use_mingw = True
                    cmake_args = ["-G", "MinGW Makefiles"] + cmake_args
                    build_args = []  # MinGW Makefiles do not accept --config
                except (OSError, subprocess.CalledProcessError):
                    # If g++ is not found, let CMake use its default (Visual Studio)
                    cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]

            if use_mingw:
                gxx_path = shutil.which("g++")
                if gxx_path:
                    mingw_bin = Path(gxx_path).parent
        else:
            build_args += ["--", "-j4"]

        env = os.environ.copy()
        env["CXXFLAGS"] = f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"'

        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)

        subprocess.check_call(["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
        subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp)

        if use_mingw and mingw_bin is not None:
            runtime_libs = [
                "libstdc++-6.dll",
                "libgcc_s_seh-1.dll",
                "libwinpthread-1.dll",
            ]
            extdir_path = Path(extdir)
            extdir_path.mkdir(parents=True, exist_ok=True)
            for lib_name in runtime_libs:
                src_path = mingw_bin / lib_name
                if src_path.exists():
                    shutil.copy2(src_path, extdir_path / lib_name)
                else:
                    self.announce(
                        f"Warning: Expected MinGW runtime DLL '{lib_name}' not found next to g++ (looked in {mingw_bin}). "
                        "The built extension may fail to import if the DLL is not on PATH.",
                        level=3,
                    )


setup(
    name="motion_correction",
    version="1.0.0",
    author="NVIDIA",
    description="Standalone correct_motion function",
    long_description="",
    packages=["motion_correction"],
    package_dir={"": "python"},
    ext_modules=[CMakeExtension("motion_correction._motion_correction")],
    cmdclass={"build_ext": CMakeBuild},
    zip_safe=False,
    python_requires=">=3.8",
    install_requires=[
        "torch>=1.10.0",
        "numpy>=1.19.0",
        # 'cmake' # can install this via pip if the windows system does not have it. But need to run this by yourself before build, not in here.
    ],
)


================================================
FILE: MotionCorrection/src/cpp/AnimProcessing/InverseKinematics.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "InverseKinematics.h"
#include "Math/Scalar.h"
#include <iostream>


using namespace IK;

namespace
{

float getAngleWithTwoSideVectors(const Math::Vector& vecLeft, const Math::Vector& vecRight)
{
    auto lNorm = vecLeft.GetNormalized3();
    auto rNorm = vecRight.GetNormalized3();

    float cosine = lNorm.GetDot3(rNorm);
    float sine = lNorm.Cross3(rNorm).GetLength3();

    return atan2f(sine, cosine);  // in radian
}

float getAngleWithCosineRule (const float lSideLeft, const float lSideRight, const float lSideAcross)
{
    float val =
        (lSideRight * lSideRight + lSideLeft * lSideLeft - lSideAcross * lSideAcross) /
            (2.0f * lSideLeft * lSideRight);
    val = Math::Clamp(val, -1.0f, 1.0f);  // numerical stability. also avoid impossible trangulars
    return acosf(val);  // in radian
}

}


void IK::TwoBoneIk(
    Pose& pose,
    const Math::Transform& rootTransform,
    uint32_t cIdx,
    float weight,
    const Math::Vector& target,
    const std::vector<int>& joint_parents_vec,
    const Math::Vector& hintOffset
)
{
    weight = Math::Clamp(weight, 0.0f, 1.0f);
    if (!(weight > 0.0f))
        return;

    // Two bone IK: joints are represented as "a", "b", "c" in the below comments:
    //  1. stage 1, bend joint a and joint b, so that |ac| = |at|, while vec_ac maintain the same direction
    //  2. stage 2, rotate start joint a so that c and t are in the same place

    //  a                   a                   a             |
    //  |\                  |\                  |\            |
    //  | \                 |  \                | \           |
    //  |  \  (stage 1 ->)  |   \  (stage 2 ->) |  \          |
    //  |   b               |    b              |   b         |
    //  |    \              |    |              |  /          |
    //  |     \             |     |             | /           |
    //  t      c            t      c            t(c)          |
    //  (a is the root joint, b is the middle joint and c is the end joint)
    //

    int32_t bIdx = joint_parents_vec[cIdx];
    if (bIdx < 0)
    {
        return;
    }
    int32_t aIdx = joint_parents_vec[bIdx];
    if (aIdx < 0)
    {
        return;
    }

    // Find the parent world transform of joint a:
    Math::Transform aParentWorldTransform = Math::Transform::Identity;
    int32_t idx = joint_parents_vec[aIdx];
    while (idx >= 0)
    {
        aParentWorldTransform = aParentWorldTransform * pose[idx];
        idx = joint_parents_vec[idx];
    }
    aParentWorldTransform = aParentWorldTransform * rootTransform;

    // Compute world space transforms of a, b and c:
    Math::Transform aWorld = pose[aIdx] * aParentWorldTransform;
    Math::Transform bWorld = pose[bIdx] * aWorld;
    Math::Transform cWorld = pose[cIdx] * bWorld;

    auto a = aWorld.GetTranslation();
    auto b = bWorld.GetTranslation();
    auto c = cWorld.GetTranslation();
    auto t = Math::Vector::Lerp(c, target, weight);

    // step 1 (stage 1): extend / contract the joint chain to match the distance
    float eps = 0.0001f;  // numerical stability
    float l_ab = (b - a).Length3().GetX();
    float l_bc = (c - b).Length3().GetX();
    float l_at = (a - t).Length3().GetX();
    l_at = Math::Clamp(l_at, eps, (l_ab + l_bc) * 0.999f); // when not reachable, replace with maximum reachable length

    // get the current angles
    float theta_bac_current = getAngleWithTwoSideVectors(a - b, a - c);
    float theta_abc_current = getAngleWithTwoSideVectors(b - a, b - c);
    // get the desired angles
    if (l_ab < eps || l_bc < eps || l_at < eps)
    {
        return;  // the length is too small. rejecting potentially numerically unstable requests.
    }
    float theta_bac_desired = getAngleWithCosineRule(l_ab, l_at, l_bc);
    float theta_abc_desired = getAngleWithCosineRule(l_ab, l_bc, l_at);

    // in joint[0]'s parent's space
    Math::Vector rotationAxis = Math::Vector::Cross3(c - a, bWorld.TransformPoint(hintOffset) - a);
    float l = rotationAxis.GetLength3();
    if (l == 0)
    {
        rotationAxis = Math::Vector(0,0,1);
    }
    else
    {
        rotationAxis /= l;
    }

    // get the rotation with axis in the local space of joint a and joint b
    Math::Vector rotationAxisLocalInBSpace = bWorld.GetRotation().RotateVectorInverse(rotationAxis);
    Math::Transform rotateInB(
        Math::Quaternion(rotationAxisLocalInBSpace,
            (theta_abc_desired - theta_abc_current)), Math::Vector::Zero);

    pose[bIdx] = rotateInB * pose[bIdx];

    Math::Vector rotationAxisLocalInASpace = aWorld.GetRotation().RotateVectorInverse(rotationAxis);
    Math::Transform rotateInA(
        Math::Quaternion(rotationAxisLocalInASpace,
            (theta_bac_desired - theta_bac_current)), Math::Vector::Zero);

    pose[aIdx] = rotateInA * pose[aIdx];

    // recompute a's world space transform as we're going to need it:
    aWorld = pose[aIdx] * aParentWorldTransform;

    // step 2 (stage 2): rotate joint a so that the target and the end joint c matches
    auto acLocal = aWorld.GetRotation().RotateVectorInverse(
        c - a);
    auto atLocal = aWorld.GetRotation().RotateVectorInverse(
        target - a);
    Math::Transform rotateStageTwo(
        Math::Quaternion::FromRotationBetweenVectors(acLocal, atLocal), Math::Vector::Zero
    );

    pose[aIdx] = rotateStageTwo * pose[aIdx];

}

void IK::OneBoneIk(
    Pose& pose,
    const Math::Transform& rootTransform,
    uint32_t bIdx,
    float weight,
    const Math::Vector& target,
    const std::vector<int>& joint_parents_vec
)
{
    weight = Math::Clamp(weight, 0.0f, 1.0f);
    if (!(weight > 0.0f))
        return;

    int32_t aIdx = joint_parents_vec[bIdx];
    if (aIdx < 0)
    {
        return;
    }

    // Find the parent world transform of joint a:
    Math::Transform aParentWorldTransform = Math::Transform::Identity;
    int32_t idx = joint_parents_vec[aIdx];
    while (idx >= 0)
    {
        aParentWorldTransform = aParentWorldTransform * pose[idx];
        idx = joint_parents_vec[idx];
    }
    aParentWorldTransform = aParentWorldTransform * rootTransform;

    // Compute world space transforms of a, b and c:
    Math::Transform aWorld = pose[aIdx] * aParentWorldTransform;
    Math::Transform bWorld = pose[bIdx] * aWorld;

    auto abLocal = aWorld.GetRotation().RotateVectorInverse(
        bWorld.GetTranslation() - aWorld.GetTranslation());
    auto atLocal = aWorld.GetRotation().RotateVectorInverse(
        target - aWorld.GetTranslation());

    auto deltaRLocal = Math::Quaternion::NLerp(Math::Quaternion::Identity, Math::Quaternion::FromRotationBetweenVectors(abLocal, atLocal), weight);
    pose[aIdx].SetRotation(deltaRLocal * pose[aIdx].GetRotation());
}


================================================
FILE: MotionCorrection/src/cpp/AnimProcessing/InverseKinematics.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Math/Transform.h"

#include <vector>

using Pose = std::vector<Math::Transform>;

namespace IK {

    void TwoBoneIk(
        Pose& pose,
        const Math::Transform& rootTransform,
        uint32_t jointIdx,
        float weight,
        const Math::Vector& target,
        const std::vector<int>& joint_parents_vec,
        const Math::Vector& hintOffset = Math::Vector::Zero
    );

    void OneBoneIk(
        Pose& pose,
        const Math::Transform& rootTransform,
        uint32_t jointIdx,
        float weight,
        const Math::Vector& target,
        const std::vector<int>& joint_parents_vec
    );

}


================================================
FILE: MotionCorrection/src/cpp/AnimProcessing/TrajectoryCorrector.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "TrajectoryCorrector.h"
#include <iostream>

static void removeRows(
    Eigen::SparseMatrix<double>& M,
    Eigen::MatrixXd *v,
    int minCoeffs)
{
    Eigen::SparseMatrix<double, Eigen::RowMajor> rowMajorMat = M;
    rowMajorMat.makeCompressed(); // Ensure compressed format

    std::vector<Eigen::Triplet<double>> triplets;
    triplets.reserve(rowMajorMat.nonZeros());

    int newRow = 0;
    for (int i = 0; i < rowMajorMat.outerSize(); ++i) {
        // Get nonzero count via outerIndexPtr (compressed format only)
        int nnz = rowMajorMat.outerIndexPtr()[i + 1] - rowMajorMat.outerIndexPtr()[i];

        if (nnz >= minCoeffs) {
            // Iterate through nonzeros in this row
            for (Eigen::SparseMatrix<double, Eigen::RowMajor>::InnerIterator it(rowMajorMat, i); it; ++it) {
                triplets.emplace_back(newRow, it.col(), it.value());
            }
            if (v)
            {
                v->row(newRow) = v->row(i);
            }
            newRow++;
        }
    }

    M = Eigen::SparseMatrix<double>(newRow, M.cols());
    M.setFromTriplets(triplets.begin(), triplets.end());
    if (v)
    {
        v->conservativeResize(newRow, v->cols());
    }
}

static void multVelWeights(
    Eigen::SparseMatrix<double>& V,
    Eigen::MatrixXd* v_rhs,
    const Eigen::VectorXd& velocityWeights
)
{
    Eigen::SparseMatrix<double, Eigen::RowMajor> rowMajorMat = V;
    rowMajorMat.makeCompressed(); // Ensure compressed format

    std::vector<Eigen::Triplet<double>> triplets;
    triplets.reserve(rowMajorMat.nonZeros());

    for (int i = 0; i < rowMajorMat.outerSize(); ++i) {
        // Iterate through nonzeros in this row
        Eigen::SparseMatrix<double, Eigen::RowMajor>::InnerIterator it(rowMajorMat, i);
        double vel_weight = velocityWeights[it.col()];
        for(; it; ++it)
        {
            triplets.emplace_back(i, it.col(), it.value() * vel_weight);
        }
        if (v_rhs)
        {
            (*v_rhs).row(i) = (*v_rhs).row(i) * vel_weight;
        }
    }
}


void TrajectoryCorrector::computeDiffMats(
    Eigen::SparseMatrix<double>& V,
    Eigen::SparseMatrix<double>& A,
    uint32_t N,
    const Eigen::VectorXd& velocityWeights,
    Eigen::MatrixXd* v_rhs,
    Eigen::MatrixXd* a_rhs)
{
    std::vector<Eigen::Triplet<double>> tripletList;

    // Identity matrix"
    tripletList.clear();
    Eigen::SparseMatrix<double> I(N, N);
    for (uint32_t i = 0; i < N; ++i)
    {
        tripletList.emplace_back(i, i, 1);
    }
    I.setFromTriplets(tripletList.begin(), tripletList.end());

    // urr, a time translation operator? Gives you the value on the next frame.
    // Leave the last row blank because that's the end of the timeline.
    tripletList.clear();
    Eigen::SparseMatrix<double> T(N, N);
    Eigen::MatrixXd t_rhs;
    for(uint32_t i = 0; i < N-1; ++i)
    {
        // next frame is
        tripletList.emplace_back(i, i+1, 1.0);
    }
    T.setFromTriplets(tripletList.begin(), tripletList.end());

    // v = Tx + t_rhs - x;
    // v = (T - I)x + t_rhs;
    V = T - I;
    if (v_rhs)
    {
        *v_rhs = t_rhs;
    }
    removeRows(V, v_rhs, 2);

    // a = -x + 2 (T x + t_rhs) - (T (T x + t_rhs) + t_rhs)
    // a = (-I + 2 T - T^2) x + t_rhs - T t_rhs
    A = 2 * T - I - T * T;
    if (a_rhs)
    {
        *a_rhs = t_rhs - T * t_rhs;
    }
    removeRows(A, a_rhs, 3);

    if (velocityWeights.size() > 0)
    {
        multVelWeights(V, v_rhs, velocityWeights);
    }
}

TrajectoryCorrector::TrajectoryCorrector(
    const Eigen::VectorXd& margins,
    float pos_weight,
    float vel_weight,
    float acc_weight,
    const Eigen::VectorXd& velocityWeights,
    uint32_t admm_iters ) :
    m_admm_iters(admm_iters)
{

    // This class is used to modify a trajectory to hit specific values at
    // specific frames, while respecting the following soft constraints:

    // * Preserve the original positions
    // * Preserve the original velocities
    // * Preserve the original accelerations

    // The weights of these soft constraints are specified in "pos_weight" etc.

    // This is posed as a minimization problem:

    // E(x) = pos_weight * |x - x_orig|^2 + vel_weight * |V x - V x_orig| + acc_weight * |A x - A x_orig|

    // where you minimize E(x) subject to specified values at indices where "mask"
    // is equal to 1. V is a matrix that computes the N-1 velocities between frame n-1 and frame n,
    // and A computes the N-2 accelerations associated with frames n-1, n and n+1.

    // In addition to this, there are constraints where the trajectory is allowed to
    // deviate from the target points by a maximum margin. The "margins" input to this
    // constructor specifies what type of constraint is active on a particular frame:

    // margins[0] < 0   ==> unconstrained
    // margins[i] == 0  ==> pinned on this frame
    // margins[i] > 0   ==> can deviate within the margin

    // I'm solving the optimization problem using ADMM, ie following equations
    // 8,9,10 on this paper:

    // https://mattoverby.net/files/admm-pd-overby17.pdf

    uint32_t N = uint32_t(margins.rows());
    for(uint32_t i = 0; i < N; ++i)
    {
        if( margins[i] > 0 )
        {
            m_margin_locs.push_back(i);
            m_margin_vals.push_back(margins[i]);
        }

        if(margins[i] == 0)
        {
            m_constrained_locs.push_back(i);
        }
        else
        {
            m_unconstrained_locs.push_back(i);
        }
    }

    Eigen::SparseMatrix<double> V, A;
    computeDiffMats(
        V,  A,
        N, velocityWeights
    );

    // build an identity matrix:
    std::vector<Eigen::Triplet<double>> tripletList;
    Eigen::SparseMatrix<double> I(N, N);
    for (uint32_t i = 0; i < N; ++i)
    {
        tripletList.emplace_back(i, i, 1.0f);
    }
    I.setFromTriplets(tripletList.begin(), tripletList.end());

    /*
    self.N = (
            self.pos_weight * torch.diag_embed(torch.full_like(interp_mask, 1)) +
            self.vel_weight * torch.matmul(self.V.T, self.V) +
            self.acc_weight * torch.matmul(self.A.T, self.A)
        )
    */

    m_N = pos_weight * I + vel_weight * (V.transpose() * V) + acc_weight * (A.transpose() * A);

    double diagMax = 0;
    for (uint32_t i = 0; i < N; ++i)
    {
        diagMax = std::max(m_N.coeff(i,i), diagMax);
    }
    m_admm_stepsize = 0.5f * sqrtf(float(diagMax));

    /*
    M = (
        self.N +
        self.admm_stepsize * torch.matmul(self.S.T, self.S)
    )
    */
    tripletList.clear();
    Eigen::SparseMatrix<double> M(N, N);
    for( auto i : m_margin_locs)
    {
        tripletList.emplace_back(i, i, m_admm_stepsize);
    }
    M.setFromTriplets(tripletList.begin(), tripletList.end());
    M += m_N;

    /*
    self.lhsmat = torch.matmul(self.U.T, torch.matmul(self.M, self.U))
    self.lhsmat_inv = torch.inverse(self.lhsmat)
    */
    tripletList.clear();
    Eigen::SparseMatrix<double> S(m_unconstrained_locs.size(), N);
    for (uint32_t i = 0; i < m_unconstrained_locs.size(); ++i)
    {
        uint32_t ifull = m_unconstrained_locs[i];
        tripletList.emplace_back(i, ifull, 1.0f);
    }
    S.setFromTriplets(tripletList.begin(), tripletList.end());
    M = S * M * S.transpose();

    if(m_unconstrained_locs.size())
    {
        m_system_lu.compute(M);
    }
}


void TrajectoryCorrector::Interpolate(
    Eigen::MatrixXd& x,
    const Eigen::MatrixXd& observations,
    const Eigen::MatrixXd& ref_positions
) const
{
    if(
        m_constrained_locs.empty() &&
        m_margin_locs.empty()
    )
    {
        x = ref_positions;
        return;
    }

    uint32_t numCols = uint32_t(x.cols());
    if(m_margin_locs.empty())
    {
        x_update(
            x,
            Eigen::MatrixXd(0, numCols),
            Eigen::MatrixXd(0, numCols),
            ref_positions,
            observations
        );
    }
    else
    {
        x = ref_positions;
        Eigen::MatrixXd z(m_margin_locs.size(), numCols);
        Eigen::MatrixXd z_t(m_margin_locs.size(), numCols);
        Eigen::MatrixXd u(m_margin_locs.size(), numCols);
        for( uint32_t i = 0; i < m_margin_locs.size(); ++i)
        {
            for(uint32_t j = 0; j < numCols; ++j)
            {
                z_t(i, j) = observations(m_margin_locs[i], j);
                z(i, j) = ref_positions(m_margin_locs[i], j);
                u(i, j) =0;
            }
        }

        for(uint32_t i = 0; i < m_admm_iters; ++i)
        {
            x_update(
                x,
                z,
                u,
                ref_positions,
                observations
            );
            z_update(z, x, z_t, u);
            u_update(u, x, z);
        }
    }

}

void TrajectoryCorrector::x_update(
    Eigen::MatrixXd &x,
    const Eigen::MatrixXd &z,
    const Eigen::MatrixXd &u,
    const Eigen::MatrixXd &x_t, // reference positions - defines the original shape of the curve that we want to preserve
    const Eigen::MatrixXd &x_o  // target positions for constraints
) const
{

    uint32_t numRows = uint32_t(x.rows());
    uint32_t numCols = uint32_t(x.cols());

    // Here's what we're minimizing with ADMM:
    // min f(x) + g(z)
    // s.t A x + B z = c

    // Make these choices so that z = S x:
    // A = S, B = -I, c = 0
    //
    // g(z) = infinity when it's too far away from z_target, zero otherwise
    //
    // f(x) penalizes deviations in position, velocity and acceleration
    // from a reference trajectory:
    //
    // f(x) = 1/2(
    //    kx |I x - x_t|^2 +
    //    kv |V x - v_t|^2 +
    //    kx |A x - a_t|^2
    // )
    //
    // It's also infinite when components of x devaiate from their target
    // values where they're pinned...

    // Substituting the matrices into the standard admm update rules gives us this:
    // x{n+1} = argmin(f(x) + ρ/2 |S x - z{n} + u{n}|^2)
    // z{n+1} = argmin(g(z) + ρ/2 |S x{n+1} - z + u{n}|^2)
    // u{n+1} = u{n} + (S x{n+1} - z{n+1})
    //

    // x update:
    //
    // x{n+1} = argmin  1/2 (
    //     kx |I x - x_t|^2 +
    //     kv |V x - v_t|^2 +
    //     ka |A x - a_t|^2 +
    //     ρ  |S x - d|^2
    // )
    // d = (z{n} - u{n})

    // Rewrite in a friendlier way:
    // |A x - b|^2 = x^T A^T A x - 2 x^T A^T b + C
    // 1/2 (
    //     kx (x^T x - 2 x^T x_t) +
    //     kv (x^T V^T V x - 2 x^T V^T v_t) +
    //     ka (x^T A^T A x - 2 x^T A^T a_t) +
    //     ρ  (x^T S^T S x - 2 x^T S^T d)
    // ) + C
    //
    // 1/2 x^T (kx I + kv V^T V + ka A^T A + ρ S^T S) x
    //   - x^T (kx x_t + kv V^T v_t + ka A^T a_t + ρ S^T d)
    // + C
    //
    // voila:
    // M = kx I + kv V^T V + ka A^T A + ρ S^T S
    // r = kx x_t + kv V^T v_t + ka A^T a_t + ρ S^T d
    // E = 1/2 x^T M x - x^T r + C

    /*
    r = (
        torch.matmul(self.N, x_t - x_o_filtered) +
        self.admm_stepsize * torch.matmul(self.S.T, - u + z)
    )
    */
    Eigen::MatrixXd x_diffs(x_t);
    for(auto i : m_constrained_locs)
    {
        for(uint32_t j = 0; j < numCols; ++j)
        {
            x_diffs(i, j) = x_diffs(i,j) - x_o(i,j);
        }
    }

    Eigen::MatrixXd r = m_N * x_diffs;

    for(uint32_t i = 0; i < m_margin_locs.size(); ++i)
    {
        uint32_t ifull = m_margin_locs[i];
        for(uint32_t j = 0; j < numCols; ++j)
        {
            r(ifull, j) = r(ifull, j) + m_admm_stepsize * (z(i,j) - u(i,j));
        }
    }

    // Solve with respect to pin constraints:
    // x = U x_r + x_o
    // E = 1/2 (U x_r + x_o)^T M (U x_r + x_o) - (U x_r + x_o)^T r + C
    // E = 1/2 (x_r^T U^T + x_o^T) M (U x_r + x_o) - (x_r^T U^T + x_o^T) r + C
    // E = 1/2 (x_r^T U^T M (U x_r + x_o) + x_o^T M (U x_r + x_o)) - x_r^T U^T r - x_o^T r + C
    // E = 1/2 (x_r^T U^T M U x_r) + x_r^T U^T (M x_o - r) + C

    // minimized when x_r solves this equation:
    // U^T M U x_r + U^T (M x_o - r) = 0
    // x_r = (U^T M U)^-1 U^T (r - M x_o)

    // collapse r down to unconstrained variable set:
    // rhs = torch.matmul(self.U.T, r)

    uint32_t numRows_reduced = m_unconstrained_locs.size();
    Eigen::MatrixXd r_reduced(numRows_reduced, numCols);
    for(uint32_t i = 0; i < numRows_reduced; ++i)
    {
        uint32_t ifull = m_unconstrained_locs[i];
        for(uint32_t j = 0; j < numCols; ++j)
        {
            r_reduced(i,j) = r(ifull, j);
        }
    }

    // solve system:
    // x_r = torch.matmul(self.lhsmat_inv, rhs)
    r_reduced.conservativeResize(r_reduced.rows(), r_reduced.cols());

    Eigen::MatrixXd result;
    if(m_unconstrained_locs.size())
    {
        result = m_system_lu.solve(r_reduced);
    }

    // map back to full variable set:
    // return torch.matmul(self.U, x_r) + x_o_filtered
    for(uint32_t i = 0; i < numRows_reduced; ++i)
    {
        uint32_t ifull = m_unconstrained_locs[i];
        for(uint32_t j = 0; j < numCols; ++j)
        {
            x(ifull, j) = result(i, j);
        }
    }
    for(auto i : m_constrained_locs)
    {
        for(uint32_t j = 0; j < numCols; ++j)
        {
            x(i, j) = x_o(i, j);
        }
    }
}

void TrajectoryCorrector::z_update(
    Eigen::MatrixXd &z,
    const Eigen::MatrixXd &x,
    const Eigen::MatrixXd &z_t,
    const Eigen::MatrixXd &u
) const
{
    uint32_t numCols = uint32_t(z.cols());

    for(uint32_t i = 0; i < m_margin_locs.size(); ++i)
    {

        // z_diffs = S x + u - z_t
        uint32_t ifull = m_margin_locs[i];
        for(uint32_t j = 0; j < numCols; ++j)
        {
            z(i, j) = x(ifull, j) + u(i, j) - z_t(i, j);
        }

        // find the norm of the current z diff vector:
        double z_diff_norm = 0.0;
        for(uint32_t j = 0; j < numCols; ++j)
        {
            double z_diff = z(i, j);
            z_diff_norm += z_diff * z_diff;
        }
        z_diff_norm = sqrt(z_diff_norm);

        // if the norm is greater than the margin size, we need to rescale
        // the diff:
        if( z_diff_norm > m_margin_vals[i] )
        {
            for(uint32_t j = 0; j < numCols; ++j)
            {
                z(i, j) = z(i, j) * m_margin_vals[i] / z_diff_norm;
            }
        }

        // add the diff back on to the target:
        for(uint32_t j = 0; j < numCols; ++j)
        {
            z(i, j) = z_t(i, j) + z(i, j);
        }
    }
}

void TrajectoryCorrector::u_update(
    Eigen::MatrixXd &u,
    const Eigen::MatrixXd &x,
    const Eigen::MatrixXd &z
) const
{
    uint32_t numCols = uint32_t(z.cols());

    // u += S x - z
    for(uint32_t i = 0; i < m_margin_locs.size(); ++i)
    {
        uint32_t ifull = m_margin_locs[i];
        for(uint32_t j = 0; j < numCols; ++j)
        {
            u(i,j) += x(ifull, j) - z(i,j);
        }
    }
}


================================================
FILE: MotionCorrection/src/cpp/AnimProcessing/TrajectoryCorrector.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include <Eigen/Sparse>

class TrajectoryCorrector
{
public:

	static void computeDiffMats(
		Eigen::SparseMatrix<double>& V,
		Eigen::SparseMatrix<double>& A,
		uint32_t N,
		const Eigen::VectorXd& velocityWeights = Eigen::VectorXd(),
		Eigen::MatrixXd* v_rhs = nullptr,
		Eigen::MatrixXd* a_rhs = nullptr
	);

	TrajectoryCorrector(
		const Eigen::VectorXd& margins,
        float pos_weight,
        float vel_weight,
        float acc_weight,
		const Eigen::VectorXd& velocityWeights = Eigen::VectorXd(),
		uint32_t admm_iters=100 );

	void Interpolate(
		Eigen::MatrixXd& ret,
		const Eigen::MatrixXd& observations,
		const Eigen::MatrixXd& ref_positions
	) const;

	void x_update(
		Eigen::MatrixXd& x,
		const Eigen::MatrixXd& z,
		const Eigen::MatrixXd& u,
		const Eigen::MatrixXd& x_t,
		const Eigen::MatrixXd& x_o
	) const;

	void z_update(
		Eigen::MatrixXd& z,
		const Eigen::MatrixXd& x,
		const Eigen::MatrixXd& z_t,
		const Eigen::MatrixXd& u
	) const;

	void u_update(
		Eigen::MatrixXd& u,
		const Eigen::MatrixXd& x,
		const Eigen::MatrixXd& z
	) const;

	float admm_stepsize() const { return m_admm_stepsize; }

	const std::vector<uint32_t>& margin_locs() { return m_margin_locs; }

private:

	Eigen::SparseMatrix<double> m_N;
	Eigen::SparseLU<Eigen::SparseMatrix<double>> m_system_lu;

	uint32_t m_admm_iters;

	std::vector<uint32_t> m_margin_locs;
	std::vector<double> m_margin_vals;

	std::vector<uint32_t> m_unconstrained_locs;
	std::vector<uint32_t> m_constrained_locs;

	float m_admm_stepsize;

};


================================================
FILE: MotionCorrection/src/cpp/AnimProcessing/Utility.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "TrajectoryCorrector.h"
#include "InverseKinematics.h"

#include "Utility.h"

#include <map>
#include <array>
#include <cmath>
#include <cstdlib>
#include <iostream>
using Pose = std::vector<Math::Transform>;

static const float pos_weight = 0.001f;
static const float vel_weight = 1.0f;
static const float acc_weight = 10.0f;


namespace {

    // Enable with: MOTIONCORRECTION_DEBUG_INTERVALS=1
    // Default: off (no Interval printing).
    bool DebugPrintIntervalsEnabled()
    {
        const char* v = std::getenv("MOTIONCORRECTION_DEBUG_INTERVALS");
        if (v == nullptr || v[0] == '\0')
        {
            return false;
        }
        // Treat "0" as false; any other non-empty value enables.
        return v[0] != '0';
    }


    void FilterContactIntervals(
        std::vector<std::pair<int, int>>& contactIntervals,
        const std::vector<float>& mask,
        bool one_bone_contact = false)
    {
        std::vector<uint32_t> keepIntervals;
        for (size_t i = 0; i < contactIntervals.size(); ++i)
        {
            const auto& interval = contactIntervals[i];

            bool startConstrained = (interval.first != 0 && mask[interval.first - 1]);
            bool endConstrained;

            endConstrained = (interval.second != mask.size() && mask[interval.second]);

            if (one_bone_contact)
            {
                if (startConstrained || endConstrained)
                {
                    continue;
                }
            }
            else
            {
                // If both the start and end of the contact interval are masked,
                // there's no way we can correct the contact without popping, so
                // let's filter these out:
                if (startConstrained && endConstrained)
                {
                    continue;
                }
            }

            keepIntervals.push_back(i);
        }

        for (size_t i = 0; i < keepIntervals.size(); ++i)
        {
            contactIntervals[i] = contactIntervals[keepIntervals[i]];
        }
        contactIntervals.resize(keepIntervals.size());
    }

    std::vector<std::pair<int, int>> ComputeContactIntervals(
        const std::vector<float>& contacts,
        const std::vector<float>& mask,
        float contactThreshold)
    {
        // turn off the contacts for all frames that are constrained/masked:
        std::vector<float> contactsNoMask = contacts;
        for (size_t i = 0; i < mask.size(); ++i)
        {
            if (mask[i])
            {
                contactsNoMask[i] = 0;
            }
        }

        // Find intervals that are in contact:
        std::vector<std::pair<int, int>> contactIntervals;
        int start = -1;
        for (int frame = 0; frame < mask.size(); ++frame)
        {
            bool isContact = contactsNoMask[frame] > contactThreshold;
            if (isContact && start == -1)
            {
                start = frame;
            }
            else if (!isContact && start != -1)
            {
                contactIntervals.emplace_back(start, frame);
                start = -1;
            }
        }

        // Close the final interval if needed:
        if (start != -1)
        {
            contactIntervals.emplace_back(start, mask.size());
        }
        return contactIntervals;
    }

    void FindContactPoints(
        std::vector<Math::Vector> &points,
        std::vector<int> &inContact,
        const std::vector<int>& joint_parents_vec,
        int32_t jointIndex,
        const std::vector<Pose> &poses,
        const std::vector<std::pair<int, int>>& contactIntervals,
        const std::vector<float>& mask,
        size_t frameCount,
        float minHeight)
    {
        // Find a representative frame for each interval.
        // If the interval starts after a masked frame, use the start
        // of the interval, if it ends before a mask use the end,
        // otherwise use the middle frame.
        inContact.clear();
        inContact.resize(frameCount, 0);
        points.clear();
        points.resize(frameCount);
        for (size_t i = 0; i < contactIntervals.size(); ++i)
        {
            const auto& interval = contactIntervals[i];
            int frame = -1;
            bool startConstrained = (interval.first != 0 && mask[interval.first - 1]);
            bool endConstrained;

            endConstrained = (interval.second != mask.size() && mask[interval.second]);

            // Debug output (opt-in via env var)
            if (DebugPrintIntervalsEnabled())
            {
                std::cout << "Interval " << i << ": start=" << interval.first
                          << ", end=" << interval.second
                          << ", startConstrained=" << startConstrained
                          << ", endConstrained=" << endConstrained << std::endl;
            }

            if(startConstrained)
            {
                // If the interval starts on a constraint, use the constrained frame
                // as a target (doing this modulo mask.size() in case we're looping)
                frame = interval.first - 1;
            }
            else if (endConstrained)
            {
                // If the interval ends on a constraint, use the constrained frame
                // as a target:
                frame = interval.second;
            }
            else
            {
                // Otherwise use the midpoint of the interval:
                frame = (interval.first + interval.second) / 2;
            }

            // get the target point:
            Math::Vector target = Animation::JointLocalToGlobal(joint_parents_vec, jointIndex, poses[frame]).GetTranslation();
            for(int i = interval.first; i < interval.second; ++i)
            {
                Math::Vector framePt = Animation::JointLocalToGlobal(joint_parents_vec, jointIndex, poses[i]).GetTranslation();
                inContact[i] = 1;
                points[i] = target;
                if (!startConstrained && !endConstrained)
                {
                    points[i].SetY(std::max(framePt.GetY(), minHeight));
                    // std::cout << "  Frame " << i << ": SetY with framePt.GetY()=" << framePt.GetY()
                    //           << ", minHeight=" << minHeight << std::endl;
                }
            }
        }
    }

    float TargetReachFalloff(
        const std::vector<int>& joint_parents_vec,
        const Pose& defaultPose,
        int32_t jointIndex,
        Animation::IKType ikType,
        const Math::Vector& target,
        const Pose& pose,
        const Math::Transform& rootTx = Math::Transform::Identity)
    {
        float maxReach = defaultPose[jointIndex].GetTranslation().GetLength3();
        if (ikType == Animation::IKType::kTwoBone)
        {
            jointIndex = joint_parents_vec[jointIndex];
            ASSERT(jointIndex > -1);
            maxReach += defaultPose[jointIndex].GetTranslation().GetLength3();
        }
        // Get base joint world Tx
        jointIndex = joint_parents_vec[jointIndex];
        ASSERT(jointIndex > -1);
        const auto worldTx = Animation::JointLocalToGlobal(joint_parents_vec, jointIndex, pose, rootTx);

        // Gaussian falloff
        float targetDist = target.GetDistance3(worldTx.GetTranslation());
        float tmp = Math::Max(targetDist / maxReach - 0.99f, 0.f) / 0.01f;
        tmp = tmp * tmp;
        return std::exp(-2.f * tmp * tmp);
    }

    void CorrectHipsY(
        std::vector<Pose>& poses,
        const std::vector<Pose>& targetPoses,
        const std::vector<float>& fullBodyMask,
        const std::vector<Animation::ContactInfo>& contacts,
        float contactThreshold
    )
    {
        // Correct the y coordinates of the root.
        auto N = poses.size();
        Eigen::MatrixXd x(N, 1);
        Eigen::MatrixXd observations(N, 1);
        Eigen::MatrixXd xfixed(N, 1);

        // Fill in the initial trajectory (x) and the values we want to hit when we
        // warp it (observations):
        Eigen::VectorXd yCorrectMargins(N);
        for(size_t frame = 0; frame < N; ++frame)
        {
            yCorrectMargins[frame] = fullBodyMask[frame] ? 0.0f : -1.0f;
            x(frame, 0) = ((float*)&poses[frame][0].GetTranslation())[1];
            observations(frame, 0) = ((float*)&targetPoses[frame][0].GetTranslation())[1];
        }

        TrajectoryCorrector ycorrector(
            yCorrectMargins,
            pos_weight * 10,
            vel_weight,
            acc_weight * 0.1f
        );
        ycorrector.Interpolate(
            xfixed,
            observations,
            x
        );

        // fill channel again:
        for (uint32_t frame = 0; frame < N; ++frame)
        {
            ((float*)&poses[frame][0].GetTranslation())[1] = float(xfixed(frame, 0));
        }
    }

    void SmoothChannels(
        Eigen::MatrixXd &x,
        const std::vector<float>& mask
    )
    {
        for( uint32_t i=0; i < mask.size(); ++i)
        {
            uint32_t i_prev = i == 0 ? 0 : i-1;
            uint32_t i_next = std::min(uint32_t(i+1), uint32_t(mask.size()-1));
            if(i > 0 && mask[i] > 0 && mask[i_prev] == 0)
            {
                // if the previous frame is unconstrained and the current frame is constrained,
                // replace the current frame with the average of its neighbors:
                for(long j=0; j < x.cols(); ++j)
                {
                    x(i, j) = 0.5f * (x(i_prev, j) + x(i_next, j));
                }
            }
            if(mask[i] > 0 && mask[i_next] == 0)
            {
                // if the next frame is unconstrained and the current frame is constrained,
                // replace the current frame with the average of its neighbors:
                for(long j=0; j < x.cols(); ++j)
                {
                    x(i, j) = 0.5f * (x(i_prev, j) + x(i_next, j));
                }
            }
        }
    }


    void CorrectHipsXZ(
        std::vector<Pose>& poses,
        const std::vector<Pose>& targetPoses,
        const std::vector<float>& fullBodyMask,
        const std::vector<float>& rootMask,
        const std::vector<Animation::ContactInfo>& endEffectorPins,
        const Eigen::VectorXd& velocity_weights,
        float root_margin
    )
    {
        auto N = poses.size();
        Eigen::VectorXd margins(N);
        for( size_t i = 0; i < N; ++i )
        {
            margins[i] = fullBodyMask[i] ? 0.0f : -1.0f;
        }

        std::vector<float> rootCombinedMask(N, 0.0f);
        for(size_t i = 0; i < N; ++i)
        {
            rootCombinedMask[i] = (fullBodyMask[i] > 0) || (rootMask[i] > 0);
            if(rootMask[i] > 0 && margins[i] != 0)
            {
                margins[i] = root_margin;
            }
            for (auto& c : endEffectorPins)
            {
                if (c.contactMask[i] && margins[i] != 0)
                {
                    margins[i] = root_margin;
                }
            }
        }
        TrajectoryCorrector xzcorrector(
            margins,
            pos_weight,
            vel_weight,
            acc_weight,
            velocity_weights
        );

        // Enforce pose constraints on root xz trajectory:
        Eigen::MatrixXd x(N, 2);
        Eigen::MatrixXd observations(N, 2);
        Eigen::MatrixXd x_fixed(N, 2);

        observations.setZero();
        for (uint32_t frame = 0; frame < N; ++frame)
        {
            x(frame, 0) = ((float*)&poses[frame][0].GetTranslation())[0];
            x(frame, 1) = ((float*)&poses[frame][0].GetTranslation())[2];

            observations(frame, 0) = ((float*)&targetPoses[frame][0].GetTranslation())[0];
            observations(frame, 1) = ((float*)&targetPoses[frame][0].GetTranslation())[2];
        }

        SmoothChannels(x, rootCombinedMask);

        xzcorrector.Interpolate(
            x_fixed,
            observations,
            x
        );

        // fill channels again:
        for (uint32_t frame = 0; frame < N; ++frame)
        {
            ((float*)&poses[frame][0].GetTranslation())[0] = float(x_fixed(frame, 0));
            ((float*)&poses[frame][0].GetTranslation())[2] = float(x_fixed(frame, 1));
        }
    }

    void CorrectRotationsForBone(
        std::vector<Pose>& poses,
        const std::vector<Pose>& targetPoses,
        const std::vector<float>& mask,
        const TrajectoryCorrector& corrector,
        int boneIdx,
        bool performChannelSmoothing)
    {
        auto N = poses.size();
        Eigen::MatrixXd x(N, 1);
        Eigen::MatrixXd observations(N, 1);
        observations.setZero();
        Eigen::MatrixXd x_fixed(N, 1);

        // Quaternion components can flip when they pass through 180 degree
        // rotations, so let's convert all the quaternions in this channel to
        // the forward/up vector representation, modify them, then convert back
        // to quaternions:

        // convert time series to 6d forward/up:
        std::vector<float> forwardUp(6 * N);
        std::vector<float> targetForwardUp(6 * N);
        for (uint32_t frame = 0; frame < N; ++frame)
        {
            auto q = poses[frame][boneIdx].GetRotation();
            auto forward = q.ZAxis();
            auto up = q.YAxis();
            forwardUp[N * 0 + frame] = forward.GetX();
            forwardUp[N * 1 + frame] = forward.GetY();
            forwardUp[N * 2 + frame] = forward.GetZ();
            forwardUp[N * 3 + frame] = up.GetX();
            forwardUp[N * 4 + frame] = up.GetY();
            forwardUp[N * 5 + frame] = up.GetZ();

            q = targetPoses[frame][boneIdx].GetRotation();
            forward = q.ZAxis();
            up = q.YAxis();
            targetForwardUp[N * 0 + frame] = forward.GetX();
            targetForwardUp[N * 1 + frame] = forward.GetY();
            targetForwardUp[N * 2 + frame] = forward.GetZ();
            targetForwardUp[N * 3 + frame] = up.GetX();
            targetForwardUp[N * 4 + frame] = up.GetY();
            targetForwardUp[N * 5 + frame] = up.GetZ();
        }

        // correct trajectories:
        for (uint32_t dim = 0; dim < 6; ++dim)
        {
            for (uint32_t frame = 0; frame < N; ++frame)
            {
                x(frame, 0) = forwardUp[N * dim + frame];
                observations(frame, 0) = mask[frame] * targetForwardUp[N * dim + frame];
            }

            if (performChannelSmoothing)
            {
                SmoothChannels(x, mask);
            }

            corrector.Interpolate(
                x_fixed,
                observations,
                x
            );

            // fill channel again:
            for (uint32_t frame = 0; frame < N; ++frame)
            {
                forwardUp[N * dim + frame] = float(x_fixed(frame, 0));
            }
        }

        for (uint32_t frame = 0; frame < N; ++frame)
        {
            Math::Vector forward = { forwardUp[N * 0 + frame] ,forwardUp[N * 1 + frame] ,forwardUp[N * 2 + frame] };
            Math::Vector up = { forwardUp[N * 3 + frame] ,forwardUp[N * 4 + frame] ,forwardUp[N * 5 + frame] };

            forward.Normalize3();
            up.Normalize3();

            poses[frame][boneIdx].SetRotation(Math::Quaternion::LookRotation(forward, up));
        }
    }

    void CorrectJointRotations(
        std::vector<Pose>& poses,
        const std::vector<Pose>& targetPoses,
        const std::vector<float>& fullBodyMask,
        const Eigen::VectorXd& velocity_weights
    )
    {
        auto N = poses.size();

        // Create a trajectory corrector for fixing the full body fullBodyMask positions:
        Eigen::VectorXd margins(N);
        for( size_t i = 0; i < N; ++i )
        {
            margins[i] = fullBodyMask[i] ? 0.0f : -1.0f;
        }
        TrajectoryCorrector corrector(
            margins,
            pos_weight * 10,
            vel_weight,
            acc_weight,
            velocity_weights
        );

        for (uint32_t boneIdx = 0; boneIdx < poses[0].size(); ++boneIdx)
        {
            CorrectRotationsForBone(
                poses,
                targetPoses,
                fullBodyMask,
                corrector,
                boneIdx,
                true
            );
        }
    }

    void DoEffectorIK(
        std::vector<Pose>& poses,
        const std::vector<Pose>& targetPoses,
        const std::vector<float>& fullBodyMask,
        const std::vector<Animation::ContactInfo>& endEffectorPins,
        const std::vector<int>& joint_parents_vec,
        const std::vector<Math::Transform>& defaultPose
    )
    {
        // Apply IK for effector pins
        auto N = poses.size();
        std::map<uint32_t, std::vector<float>> jointCorrectionMasks;
        std::vector<Pose> ikFixedPoses = poses;
        for (auto& c : endEffectorPins)
        {
            auto jointIdx = c.jointIndex;

            if(jointCorrectionMasks[jointIdx].empty())
            {
                // initialize to the full body constraint mask because we
                // want to constrain that anyway:
                jointCorrectionMasks[jointIdx] = fullBodyMask;
            }

            // Add a trajectory correction mask for the parent joint:
            auto parentIdx = joint_parents_vec[jointIdx];
            if(jointCorrectionMasks[parentIdx].empty())
            {
                // initialize to the full body constraint mask because we
                // want to constrain that anyway:
                jointCorrectionMasks[parentIdx] = fullBodyMask;
            }

            // Add a trajectory correction mask for its parent if this is
            // 2 bone IK:
            auto parentParentIdx = joint_parents_vec[parentIdx];
            if(c.contactType == Animation::kTwoBone)
            {
                if(jointCorrectionMasks[parentParentIdx].empty())
                {
                    // initialize to the full body constraint mask because we
                    // want to constrain that anyway:
                    jointCorrectionMasks[parentParentIdx] = fullBodyMask;
                }
            }

            for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame)
            {
                if (c.contactMask[fixFrame])
                {
                    const auto targetGlobalTransform = Animation::JointLocalToGlobal(joint_parents_vec, jointIdx, targetPoses[fixFrame]);

                    // flag the parent joint as fixed in its correction mask:
                    jointCorrectionMasks[parentIdx][fixFrame] = 1;
                    switch(c.contactType)
                    {
                        case Animation::kOneBone:
                        {
                            IK::OneBoneIk(
                                ikFixedPoses[fixFrame],
                                Math::Transform::Identity,
                                jointIdx,
                                1.0,
                                targetGlobalTransform.GetTranslation(),
                                joint_parents_vec
                            );
                            break;
                        }
                        case Animation::kTwoBone:
                        {
                            // flag the parent parent joint as fixed in its correction mask:
                            jointCorrectionMasks[parentParentIdx][fixFrame] = 1;
                            IK::TwoBoneIk(
                                ikFixedPoses[fixFrame],
                                Math::Transform::Identity,
                                jointIdx,
                                1.0,
                                targetGlobalTransform.GetTranslation(),
                                joint_parents_vec,
                                c.hintOffset
                            );
                            break;
                        }
                    }

                    // now we need to fix things so the global rotation of the joint
                    // matches the input:
                    jointCorrectionMasks[jointIdx][fixFrame] = 1;
                    auto parentGlobalTransform = Animation::JointLocalToGlobal(joint_parents_vec, parentIdx, ikFixedPoses[fixFrame]);
                    ikFixedPoses[fixFrame][jointIdx].SetRotation(
                        targetGlobalTransform.GetRotation() * parentGlobalTransform.GetRotation().GetConjugate()
                    );

                }
            }
        }

        // Applying the effector pin IK introduces popping into the animation,
        // so let's apply the interpolator to all the joints we modified so as to
        // line the trajectory up properly again:
        Eigen::VectorXd margins(N);
        for( auto &kv : jointCorrectionMasks)
        {
            for( size_t i = 0; i < N; ++i )
            {
                margins[i] = kv.second[i] ? 0.0f : -1.0f;
            }
            TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight);

            CorrectRotationsForBone(
                poses,
                ikFixedPoses,
                kv.second,
                corrector,
                kv.first,
                false
            );
        }
    }

    void DoContactIK(
        std::vector<Pose>& poses,
        const std::vector<float>& fullBodyMask,
        const std::vector<Animation::ContactInfo>& contacts,
        const std::vector<Animation::ContactInfo>& endEffectorPins,
        const std::vector<int>& joint_parents_vec,
        const std::vector<Math::Transform>& defaultPose,
        float contactThreshold,
        bool has_double_ankle_joints
    )
    {
        auto N = poses.size();
        Eigen::VectorXd margins = Eigen::VectorXd::Zero(N);

        // Apply IK to stabilize limbs on contacts
        std::map<uint32_t, std::vector<float>> jointCorrectionMasks;
        std::vector<Pose> ikFixedPoses = poses;

        // Save original poses before any modifications (for double ankle correction later)
        const std::vector<Pose> originalPoses = poses;

        // Track which frames were corrected for each 2-bone contact (for double ankle correction later)
        std::map<uint32_t, std::vector<bool>> twoBoneContactFrames;

        auto addEndEffectorMask = [&](uint32_t jointIdx, uint32_t parentIdx, std::vector<float>& jointMask)
        {
            auto it = std::find_if(
                endEffectorPins.begin(), endEffectorPins.end(),
                [&](const auto &c)
                {
                    if(jointIdx == c.jointIndex)
                    {
                        return true;
                    }
                    return false;
                }
            );
            if(it == endEffectorPins.end())
            {
                // We could be correcting the toe joint, in which case we need to use
                // the parent joint instead:
                it = std::find_if(
                    endEffectorPins.begin(), endEffectorPins.end(),
                    [&](const auto &c)
                    {
                        if(parentIdx == c.jointIndex)
                        {
                            return true;
                        }
                        return false;
                    }
                );
            }
            if(it != endEffectorPins.end())
            {
                const auto &msk = it->contactMask;
                for(size_t i=0; i < msk.size(); ++i)
                {
                    if(msk[i])
                    {
                        jointMask[i] = 1.0f;
                    }
                }
            }
        };

        // Process two bone contacts first:
        for (auto& c : contacts)
        {
            if(c.contactType != Animation::kTwoBone)
            {
                continue;
            }
            const auto jointIdx = c.jointIndex;
            auto parentIdx = joint_parents_vec[jointIdx];
            auto parentParentIdx = joint_parents_vec[parentIdx];

            auto jointMask = fullBodyMask;
            addEndEffectorMask(jointIdx, parentIdx, jointMask);

            // We'll actually be modifying 3 joints here:
            // * The two joints immediately up in the hierarchy because of the 2 bone IK
            // * The joint itself because we restore its original global rotation
            if(jointCorrectionMasks[parentIdx].empty())
            {
                jointCorrectionMasks[parentIdx] = jointMask;
            }
            if(jointCorrectionMasks[parentParentIdx].empty())
            {
                jointCorrectionMasks[parentParentIdx] = jointMask;
            }
            if(jointCorrectionMasks[jointIdx].empty())
            {
                jointCorrectionMasks[jointIdx] = jointMask;
            }

            // Compute the intervals in which the joint is in contact with the floor:
            auto contactIntervals = ComputeContactIntervals(c.contactMask, jointMask, contactThreshold);
            FilterContactIntervals(contactIntervals, jointMask);

            std::vector<Math::Vector> contactPoints;
            std::vector<int> inContact;
            FindContactPoints(
                contactPoints,
                inContact,
                joint_parents_vec,
                jointIdx,
                poses,
                contactIntervals,
                jointMask,
                c.contactMask.size(),
                c.minHeight
            );

            for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame)
            {
                if (inContact[fixFrame])
                {
                    auto target = contactPoints[fixFrame];
                    jointCorrectionMasks[parentIdx][fixFrame] = 1.0f;
                    jointCorrectionMasks[parentParentIdx][fixFrame] = 1.0f;
                    jointCorrectionMasks[jointIdx][fixFrame] = 1.0f;

                    // Track this frame for double ankle correction later
                    if (has_double_ankle_joints)
                    {
                        if (twoBoneContactFrames[jointIdx].empty())
                            twoBoneContactFrames[jointIdx].resize(fullBodyMask.size(), false);
                        twoBoneContactFrames[jointIdx][fixFrame] = true;
                    }

                    // save the original global rotation of the joint:
                    auto jointGlobalRotation = Animation::JointLocalToGlobal(
                        joint_parents_vec,
                        jointIdx,
                        ikFixedPoses[fixFrame]
                    ).GetRotation();

                    const float w = TargetReachFalloff(
                        joint_parents_vec,
                        defaultPose,
                        jointIdx,
                        c.contactType,
                        target,
                        ikFixedPoses[fixFrame]
                    );
                    // std::cout << "Frame " << fixFrame << ": w=" << w << std::endl;

                    // apply the 2 bone IK:
                    auto origParentRotation = ikFixedPoses[fixFrame][parentIdx].GetRotation();
                    auto origParentParentRotation = ikFixedPoses[fixFrame][parentParentIdx].GetRotation();
                    IK::TwoBoneIk(
                        ikFixedPoses[fixFrame],
                        Math::Transform::Identity,
                        jointIdx,
                        1.0f,
                        target,
                        joint_parents_vec,
                        c.hintOffset
                    );
                    ikFixedPoses[fixFrame][parentIdx].SetRotation(Math::Quaternion::SLerp(origParentRotation, ikFixedPoses[fixFrame][parentIdx].GetRotation(), w));
                    ikFixedPoses[fixFrame][parentParentIdx].SetRotation(Math::Quaternion::SLerp(origParentParentRotation, ikFixedPoses[fixFrame][parentParentIdx].GetRotation(), w));

                    // restore previous global rotation of this joint:
                    auto parentGloblalRotation = Animation::JointLocalToGlobal(
                        joint_parents_vec,
                        parentIdx,
                        ikFixedPoses[fixFrame]
                    ).GetRotation();

                    jointCorrectionMasks[jointIdx][fixFrame] = 1.0f;
                    ikFixedPoses[fixFrame][jointIdx].SetRotation(
                        jointGlobalRotation * parentGloblalRotation.GetConjugate()
                    );

                    auto result = Animation::JointLocalToGlobal(
                        joint_parents_vec,
                        jointIdx,
                        ikFixedPoses[fixFrame]
                    ).GetTranslation();
                }
            }

        }

        for( auto &kv : jointCorrectionMasks)
        {
            for( size_t i = 0; i < N; ++i )
            {
                margins[i] = kv.second[i] ? 0.0f : -1.0f;
            }
            TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight);
            CorrectRotationsForBone(
                poses,
                ikFixedPoses,
                kv.second,
                corrector,
                kv.first,
                false
            );
        }
        jointCorrectionMasks.clear();

        // Then process one bone contacts:
        for(auto &c : contacts)
        {
            if(c.contactType != Animation::kOneBone)
            {
                continue;
            }
            const auto jointIdx = c.jointIndex;
            auto parentIdx = joint_parents_vec[jointIdx];

            // We can't touch frames that have been constrained with full body constraints
            // or the end effector constraints for this joint, so let's combine fullBodyMask
            // with the end effector mask for this joint if it exists so we can use that
            // information later:
            auto jointMask = fullBodyMask;
            addEndEffectorMask(jointIdx, parentIdx, jointMask);

            // Add a trajectory correction mask for the parent joint:
            if(jointCorrectionMasks[parentIdx].empty())
            {
                jointCorrectionMasks[parentIdx] = jointMask;
            }

            // Compute the intervals in which the joint is in contact with the floor:
            auto contactIntervals = ComputeContactIntervals(c.contactMask, jointMask, contactThreshold);
            FilterContactIntervals(contactIntervals, jointMask, true);
            for(const auto &interval : contactIntervals)
            {
                for (int fixFrame = interval.first; fixFrame < interval.second; ++fixFrame)
                {
                    // All we're going to do here is stick the joint to the floor -
                    // we're going to allow it to slide from side to side.

                    // Find a target position that lies on the floor by iteratively
                    // projecting the joint to the floor (pure laziness really, this could
                    // be done analytically):
                    Math::Vector parentPos = Animation::JointLocalToGlobal(joint_parents_vec, parentIdx, ikFixedPoses[fixFrame]).GetTranslation();
                    Math::Vector target = Animation::JointLocalToGlobal(joint_parents_vec, jointIdx, ikFixedPoses[fixFrame]).GetTranslation();
                    float jointLength = (target - parentPos).GetLength3();
                    for(int32_t i = 0; i < 10; ++i)
                    {
                        target.SetY(c.minHeight);
                        auto dir = (target - parentPos).GetNormalized3();
                        target = parentPos + dir * jointLength;
                    }

                    IK::OneBoneIk(
                        ikFixedPoses[fixFrame],
                        Math::Transform::Identity,
                        jointIdx,
                        1.0f,
                        target,
                        joint_parents_vec
                    );
                    jointCorrectionMasks[parentIdx][fixFrame] = 1.0f;
                }
            }

        }

        // Fixing the contacts with IK will introduce popping into the animation,
        // so let's apply the interpolator to all the joints we modified so as to
        // line the trajectory up properly again:
        for( auto &kv : jointCorrectionMasks)
        {
            for( size_t i = 0; i < N; ++i )
            {
                margins[i] = kv.second[i] ? 0.0f : -1.0f;
            }
            TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight);
            CorrectRotationsForBone(
                poses,
                ikFixedPoses,
                kv.second,
                corrector,
                kv.first,
                false
            );
        }

        if (has_double_ankle_joints)
        {
            // Maps to save target positions BEFORE 2-bone IK modifies them
            std::map<uint32_t, std::map<uint32_t, Math::Vector>> savedFirstAnkleTargets;  // [firstAnkleIdx][frame] -> position
            std::map<uint32_t, std::map<uint32_t, Math::Vector>> savedToeTargets;         // [firstAnkleIdx][frame] -> position
            std::map<uint32_t, uint32_t> contactToToeIdx;  // firstAnkleIdx -> toeIdx

            // Find toe joints for each leg
            for (const auto& tc : contacts)
            {
                if (tc.contactType == Animation::kOneBone)
                {
                    // The parent of the toe is the 1st ankle
                    int parentIdx = joint_parents_vec[tc.jointIndex];
                    if (parentIdx >= 0)
                    {
                        contactToToeIdx[parentIdx] = tc.jointIndex;
                    }
                }
            }

            // For each 2-bone contact, correct the parent (2nd ankle) joint
            for (auto& c : contacts)
            {
                if (c.contactType != Animation::kTwoBone)
                    continue;

                const auto firstAnkleIdx = c.jointIndex;
                const auto secondAnkleIdx = joint_parents_vec[firstAnkleIdx];
                const auto kneeIdx = joint_parents_vec[secondAnkleIdx];
                const auto hipIdx = joint_parents_vec[kneeIdx];

                if (hipIdx < 0) continue;  // safety check

                // Get saved contact frames for this ankle
                auto it = twoBoneContactFrames.find(firstAnkleIdx);
                if (it == twoBoneContactFrames.end())
                    continue;
                const auto& contactFrames = it->second;

                // Add correction mask for knee and hip
                auto jointMask = fullBodyMask;
                addEndEffectorMask(firstAnkleIdx, secondAnkleIdx, jointMask);

                if (jointCorrectionMasks[kneeIdx].empty())
                    jointCorrectionMasks[kneeIdx] = jointMask;
                if (jointCorrectionMasks[hipIdx].empty())
                    jointCorrectionMasks[hipIdx] = jointMask;

                for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame)
                {
                    // Only correct frames where the 1st ankle was corrected
                    if (!contactFrames[fixFrame])
                        continue;

                    // *** SAVE TARGET POSITIONS BEFORE 2-BONE IK ***
                    savedFirstAnkleTargets[firstAnkleIdx][fixFrame] = Animation::JointLocalToGlobal(
                        joint_parents_vec, firstAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation();

                    if (contactToToeIdx.count(firstAnkleIdx))
                    {
                        savedToeTargets[firstAnkleIdx][fixFrame] = Animation::JointLocalToGlobal(
                            joint_parents_vec, contactToToeIdx[firstAnkleIdx], ikFixedPoses[fixFrame]).GetTranslation();
                    }

                    // Get original global transforms (before any IK corrections)
                    auto originalFirstAnkleGlobal = Animation::JointLocalToGlobal(
                        joint_parents_vec, firstAnkleIdx, originalPoses[fixFrame]);
                    auto originalSecondAnkleGlobal = Animation::JointLocalToGlobal(
                        joint_parents_vec, secondAnkleIdx, originalPoses[fixFrame]);

                    // Compute delta from 1st ankle to 2nd ankle in original animation
                    auto deltaFirstToSecond = originalFirstAnkleGlobal.GetDeltaToOther(originalSecondAnkleGlobal);

                    // Get corrected 1st ankle global transform
                    auto correctedFirstAnkleGlobal = Animation::JointLocalToGlobal(
                        joint_parents_vec, firstAnkleIdx, ikFixedPoses[fixFrame]);

                    // Apply the original delta to the corrected 1st ankle to get target for 2nd ankle
                    auto target = (deltaFirstToSecond * correctedFirstAnkleGlobal).GetTranslation();

                    // print current and target second ankle positions
                    auto currPos = Animation::JointLocalToGlobal(
                        joint_parents_vec, secondAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation();

                    // Apply 2-bone IK: Hip -> Knee -> 2nd Ankle
                    IK::TwoBoneIk(
                        ikFixedPoses[fixFrame],
                        Math::Transform::Identity,
                        secondAnkleIdx,
                        1.0f,
                        target,
                        joint_parents_vec,
                        c.hintOffset
                    );

                    // auto correctedPos = Animation::JointLocalToGlobal(
                    //     joint_parents_vec, secondAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation();
                    // std::cout << "Frame " << fixFrame << ": target second ankle=(" << target.GetX() << ", " << target.GetY() << ", " << target.GetZ() << "), corrected second ankle position=(" << correctedPos.GetX() << ", " << correctedPos.GetY() << ", " << correctedPos.GetZ() << ")" << std::endl;

                    jointCorrectionMasks[kneeIdx][fixFrame] = 1.0f;
                    jointCorrectionMasks[hipIdx][fixFrame] = 1.0f;
                }
            }

            // Smooth the corrected joints
            for (auto& kv : jointCorrectionMasks)
            {
                for (size_t i = 0; i < N; ++i)
                    margins[i] = kv.second[i] ? 0.0f : -1.0f;

                TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight);
                CorrectRotationsForBone(poses, ikFixedPoses, kv.second, corrector, kv.first, false);
            }

            // *** PHASE 2: 1-bone IKs to restore 1st ankle and toe ***
            jointCorrectionMasks.clear();

            for (auto& c : contacts)
            {
                if (c.contactType != Animation::kTwoBone)
                    continue;

                const auto firstAnkleIdx = c.jointIndex;
                const auto secondAnkleIdx = joint_parents_vec[firstAnkleIdx];

                auto it = twoBoneContactFrames.find(firstAnkleIdx);
                if (it == twoBoneContactFrames.end())
                    continue;

                // Setup correction masks
                auto jointMask = fullBodyMask;
                addEndEffectorMask(firstAnkleIdx, secondAnkleIdx, jointMask);

                if (jointCorrectionMasks[secondAnkleIdx].empty())
                    jointCorrectionMasks[secondAnkleIdx] = jointMask;
                if (jointCorrectionMasks[firstAnkleIdx].empty())
                    jointCorrectionMasks[firstAnkleIdx] = jointMask;

                for (uint32_t fixFrame = 0; fixFrame < fullBodyMask.size(); ++fixFrame)
                {
                    if (!it->second[fixFrame])
                        continue;

                    // 1-bone IK: Rotate 2nd ankle so 1st ankle reaches saved target
                    IK::OneBoneIk(
                        ikFixedPoses[fixFrame],
                        Math::Transform::Identity,
                        firstAnkleIdx,
                        1.0f,
                        savedFirstAnkleTargets[firstAnkleIdx][fixFrame],
                        joint_parents_vec
                    );
                    jointCorrectionMasks[secondAnkleIdx][fixFrame] = 1.0f;

                    // auto target = savedFirstAnkleTargets[firstAnkleIdx][fixFrame];
                    // auto corrected = Animation::JointLocalToGlobal(
                    //     joint_parents_vec, firstAnkleIdx, ikFixedPoses[fixFrame]).GetTranslation();
                    // std::cout << "Frame " << fixFrame << ": target first ankle=(" << target.GetX() << ", " << target.GetY() << ", " << target.GetZ() << "), corrected first ankle=(" << corrected.GetX() << ", " << corrected.GetY() << ", " << corrected.GetZ() << ")" << std::endl;

                    // 1-bone IK: Rotate 1st ankle so toe reaches saved target
                    if (contactToToeIdx.count(firstAnkleIdx) && savedToeTargets[firstAnkleIdx].count(fixFrame))
                    {
                        IK::OneBoneIk(
                            ikFixedPoses[fixFrame],
                            Math::Transform::Identity,
                            contactToToeIdx[firstAnkleIdx],
                            1.0f,
                            savedToeTargets[firstAnkleIdx][fixFrame],
                            joint_parents_vec
                        );
                        jointCorrectionMasks[firstAnkleIdx][fixFrame] = 1.0f;
                    }

                    // target = savedToeTargets[firstAnkleIdx][fixFrame];
                    // corrected = Animation::JointLocalToGlobal(
                    //     joint_parents_vec, contactToToeIdx[firstAnkleIdx], ikFixedPoses[fixFrame]).GetTranslation();
                    // std::cout << "Frame " << fixFrame << ": target toe=(" << target.GetX() << ", " << target.GetY() << ", " << target.GetZ() << "), corrected toe=(" << corrected.GetX() << ", " << corrected.GetY() << ", " << corrected.GetZ() << ")" << std::endl;
                }
            }

            // Smooth 2nd ankle and 1st ankle
            for (auto& kv : jointCorrectionMasks)
            {
                for (size_t i = 0; i < N; ++i)
                    margins[i] = kv.second[i] ? 0.0f : -1.0f;

                TrajectoryCorrector corrector(margins, pos_weight * 10, vel_weight, acc_weight);
                CorrectRotationsForBone(poses, ikFixedPoses, kv.second, corrector, kv.first, false);
            }
        }
    }

}


Math::Transform Animation::JointLocalToGlobal(
    const std::vector<int>& joint_parents_vec,
    int32_t index,
    const Pose& localPose,
    const Math::Transform& rootTx)
{
    Math::Transform worldTx = Math::Transform::Identity;
    while (index > -1)
    {
        worldTx = worldTx * localPose[index];
        index = joint_parents_vec[index];
    }

    return worldTx * rootTx;
}

void Animation::CorrectMotion(
    std::vector<Pose>& poses,
    const std::vector<Pose>& targetPoses,
    const std::vector<float>& fullBodyMask,
    const std::vector<float>& rootMask,
    const std::vector<ContactInfo>& contacts,
    const std::vector<ContactInfo>& endEffectorPins,
    const std::vector<int>& joint_parents_vec,
    const std::vector<Math::Transform>& defaultPose,
    float contactThreshold,
    float root_margin,
    bool has_double_ankle_joints
)
{

    // Calculate some weights so we can preserve velocities more strongly on frames where
    // the root velocity is low
    const uint32_t N = poses.size();
    Eigen::VectorXd velocity_weights(N);
    for (uint32_t frame = 1; frame < N; ++frame)
    {
        // work out xz velocity for this frame:
        float xdiff = poses[frame][0].GetTranslation()[0] - poses[frame - 1][0].GetTranslation()[0];
        float zdiff = poses[frame][0].GetTranslation()[2] - poses[frame - 1][0].GetTranslation()[2];

        // find velocity magnitude, divided by a typical walking speed:
        float v_mag = sqrtf(xdiff*xdiff + zdiff*zdiff) / 0.05f;

        // weight lower velocities higher so that the corrector doesn't make the character drift around
        // when it's supposed to stand still:
        v_mag = std::max(v_mag, 1.0f/1000.0f);
        velocity_weights(frame) = 1.0f / v_mag;
    }
    velocity_weights[0] = velocity_weights[1];

    // Correct root y coordinates.
    // This will warp the root y coordinates in "poses" so they match the root y coordinates
    // in "targetPoses", on frames where the root y coordinates are constrained, ie the frames
    // where fullBodyMask = 1.
    // In addition to this, it preserves the root y coordinates in "pose" on frames where foot
    // contacts are active, to avoid mushiness when characters are jumping.
    CorrectHipsY(
        poses,
        targetPoses,
        fullBodyMask,
        contacts,
        contactThreshold
    );

    // Correct root xz coordinates:
    // This will warp the root xz coordinates in "poses" so they match the xz coordinates
    // in "targetPoses" on frames where fullBodyMask = 1, and warp them so they're within
    // "root_margin" units of targetPoses on frames where rootMask = 1.
    CorrectHipsXZ(
        poses,
        targetPoses,
        fullBodyMask,
        rootMask,
        endEffectorPins,
        velocity_weights,
        root_margin
    );

    // Correct joint rotations by warping the rotations so they match targetPoses on frames
    // where fullBodyMask = 1:
    CorrectJointRotations(
        poses,
        targetPoses,
        fullBodyMask,
        velocity_weights
    );

    // Apply IK for end effector pins
    DoEffectorIK(
        poses,
        targetPoses,
        fullBodyMask,
        endEffectorPins,
        joint_parents_vec,
        defaultPose
    );

    // Apply IK to stabilize limbs on contacts
    DoContactIK(
        poses,
        fullBodyMask,
        contacts,
        endEffectorPins,
        joint_parents_vec,
        defaultPose,
        contactThreshold,
        has_double_ankle_joints
    );
    // std::cout << "Running post processing." << std::endl;
}


================================================
FILE: MotionCorrection/src/cpp/AnimProcessing/Utility.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Math/Transform.h"

#include <string>
#include <vector>

namespace Animation
{
    enum IKType {
        kOneBone,
        kTwoBone
    };

    Math::Transform JointLocalToGlobal(
        const std::vector<int>& joint_parents_vec,
        int32_t index,
        const std::vector<Math::Transform>& localPose,
        const Math::Transform& rootTx = Math::Transform::Identity
    );

    struct ContactInfo {
        // index IK contact joint:
        int jointIndex;
        // mask indicating which frames are in contact:
        std::vector<float> contactMask;
        // contact type:
        IKType contactType = kTwoBone;

        // Extra info for TwoBoneIK
        Math::Vector hintOffset = Math::Vector::Zero;

        float minHeight = 0.0f;
    };

    void CorrectMotion(
        std::vector< std::vector<Math::Transform> >& poses,
        const std::vector< std::vector<Math::Transform> >& targetPoses,
        const std::vector<float>& mask,
        const std::vector<float>& rootMask,
        const std::vector<ContactInfo>& contacts,
        const std::vector<ContactInfo>& endEffectorPins,
        const std::vector<int>& joint_parents_vec,
        const std::vector<Math::Transform>& defaultPose,
        float contactThreshold,
        float root_margin,
        bool has_double_ankle_joints
    );
}


================================================
FILE: MotionCorrection/src/cpp/BindingsPython.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "AnimProcessing/Utility.h"

#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable : 4623 4191 4686 4868 5219 4191 4355)
#endif
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#ifdef _WIN32
#pragma warning(pop)
#endif

namespace py = pybind11;

float strip_nan_inf(float x) noexcept
{
    if (std::isnan(x)) return 0;
    if (std::isinf(x)) return 0;
    return x;
}

void correct_motion(
    py::array_t<float> &rootTranslations,
    py::array_t<float> &jointRotations,
    const py::array_t<float>& rootTranslationsTarget,
    const py::array_t<float>& jointRotationsTarget,
    const py::array_t<float>& fullPoseMask,
    const py::array_t<float>& leftHandMask,
    const py::array_t<float>& rightHandMask,
    const py::array_t<float>& leftFootMask,
    const py::array_t<float>& rightFootMask,
    const py::array_t<float>& rootMask,
    const py::array_t<float>& contacts,
    const py::list& joint_parents,
    const py::list& joint_ref_translations,
    const py::list& joint_ref_rotations,
    int left_hand_idx,
    int right_hand_idx,
    int left_foot_idx,
    int right_foot_idx,
    float contact_threshold,
    float root_margin,
    bool has_double_ankle_joints
)
{
    if(joint_parents.size() != joint_ref_translations.size())
    {
        throw std::runtime_error("correct_motion python bindings: joint_parents and joint_ref_translations must have the same size");
    }
    if(joint_parents.size() != joint_ref_rotations.size())
    {
        throw std::runtime_error("correct_motion python bindings: joint_parents and joint_ref_rotations must have the same size");
    }
    if(left_hand_idx < 0 || right_hand_idx < 0 || left_foot_idx < 0 || right_foot_idx < 0)
    {
        throw std::runtime_error("correct_motion python bindings: left_hand_idx, right_hand_idx, left_foot_idx, and right_foot_idx must be non-negative");
    }
    if(left_hand_idx >= joint_parents.size() || right_hand_idx >= joint_parents.size() || left_foot_idx >= joint_parents.size() || right_foot_idx >= joint_parents.size())
    {
        throw std::runtime_error("correct_motion python bindings: left_hand_idx, right_hand_idx, left_foot_idx, and right_foot_idx must be less than the number of joints");
    }

    std::vector<Math::Transform> defaultPose(joint_parents.size());
    for (size_t i = 0; i < joint_ref_translations.size(); ++i)
    {
        if (!py::isinstance<py::list>(joint_ref_translations[i]))
        {
            throw std::runtime_error("correct_motion python bindings: Expected joint_ref_translations to be a list of lists");
        }
        py::list inner_list = joint_ref_translations[i].cast<py::list>();
        if (inner_list.size() != 3) {
            throw std::runtime_error("correct_motion python bindings: Expected joint_ref_translations to be a list of lists, length 3");
        }

        if (
            !py::isinstance<py::float_>(inner_list[0]) ||
            !py::isinstance<py::float_>(inner_list[1]) ||
            !py::isinstance<py::float_>(inner_list[2])
        )
        {
            throw std::runtime_error("correct_motion python bindings: Expected joint_ref_translations to be a list of lists, length 3, float values");
        }


        if (!py::isinstance<py::list>(joint_ref_rotations[i]))
        {
            throw std::runtime_error("correct_motion python bindings: Expected joint_ref_rotations to be a list of lists");
        }
        py::list inner_list_rot = joint_ref_rotations[i].cast<py::list>();
        if (inner_list_rot.size() != 4) {
            throw std::runtime_error("correct_motion python bindings: Expected joint_ref_rotations to be a list of lists, length 4");
        }

        if (
            !py::isinstance<py::float_>(inner_list_rot[0]) ||
            !py::isinstance<py::float_>(inner_list_rot[1]) ||
            !py::isinstance<py::float_>(inner_list_rot[2]) ||
            !py::isinstance<py::float_>(inner_list_rot[3])
        )
        {
            throw std::runtime_error("correct_motion python bindings: Expected joint_ref_rotations to be a list of lists, length 4, float values");
        }

        defaultPose[i].SetTranslation(Math::Vector(
            inner_list[0].cast<float>(),
            inner_list[1].cast<float>(),
            inner_list[2].cast<float>()));
        defaultPose[i].SetRotation(Math::Quaternion(
            inner_list_rot[0].cast<float>(),
            inner_list_rot[1].cast<float>(),
            inner_list_rot[2].cast<float>(),
            inner_list_rot[3].cast<float>()
        ));
    }

    std::vector<int> joint_parents_vec(joint_parents.size());
    for (size_t i = 0; i < joint_parents.size(); ++i)
    {
        if (!py::isinstance<py::int_>(joint_parents[i]))
        {
            throw std::runtime_error("correct_motion python bindings: Expected joint_parents to be a list of ints");
        }
        joint_parents_vec[i] = joint_parents[i].cast<int>();
        if (joint_parents_vec[i] >= (int)joint_parents.size())
        {
            throw std::runtime_error("correct_motion python bindings: joint_parents must be a list of ints, and all values must be less than the number of joints");
        }
    }

    size_t num_joints = defaultPose.size();
    size_t gen_length = fullPoseMask.size();

    if(
        leftHandMask.size() != (int)gen_length ||
        rightHandMask.size() != (int)gen_length ||
        leftFootMask.size() != (int)gen_length ||
        rightFootMask.size() != (int)gen_length ||
        rootMask.size() != (int)gen_length
    )
    {
        throw std::runtime_error("correct_motion python bindings: all masks must have the same size");
    }

    if(rootTranslations.size() != 3 * (int)gen_length)
    {
        throw std::runtime_error("correct_motion python bindings: rootTranslations has the wrong size");
    }
    if(jointRotations.size() != 4 * (int)num_joints * (int)gen_length)
    {
        throw std::runtime_error("correct_motion python bindings: jointRotations has the wrong size");
    }

    if(rootTranslationsTarget.size() != 3 * (int)gen_length)
    {
        throw std::runtime_error("correct_motion python bindings: rootTranslationsTarget has the wrong size");
    }
    if(jointRotationsTarget.size() != 4 * (int)num_joints * (int)gen_length)
    {
        throw std::runtime_error("correct_motion python bindings: jointRotationsTarget has the wrong size");
    }

    std::vector<Animation::ContactInfo> endEffectorPins(4);
    endEffectorPins[0].jointIndex = left_hand_idx;
    endEffectorPins[0].hintOffset = Math::Vector(0.0f, 0.0f, -0.1f);

    endEffectorPins[1].jointIndex = right_hand_idx;
    endEffectorPins[1].hintOffset = Math::Vector(0.0f, 0.0f, -0.1f);

    endEffectorPins[2].jointIndex = left_foot_idx;
    endEffectorPins[2].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f);

    endEffectorPins[3].jointIndex = right_foot_idx;
    endEffectorPins[3].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f);

    endEffectorPins[0].contactMask.reserve(gen_length);
    endEffectorPins[1].contactMask.reserve(gen_length);
    endEffectorPins[2].contactMask.reserve(gen_length);
    endEffectorPins[3].contactMask.reserve(gen_length);
    for(size_t i = 0; i < gen_length; ++i)
    {
        endEffectorPins[0].contactMask.push_back((1.0f - fullPoseMask.at(i)) * leftHandMask.at(i));
        endEffectorPins[1].contactMask.push_back((1.0f - fullPoseMask.at(i)) * rightHandMask.at(i));
        endEffectorPins[2].contactMask.push_back((1.0f - fullPoseMask.at(i)) * leftFootMask.at(i));
        endEffectorPins[3].contactMask.push_back((1.0f - fullPoseMask.at(i)) * rightFootMask.at(i));
    }

    std::vector<Animation::ContactInfo> contactInfo(2);

    auto footTranslation = Animation::JointLocalToGlobal(
        joint_parents_vec,
        right_foot_idx,
        defaultPose
    ).GetTranslation();

    contactInfo[0].jointIndex = right_foot_idx;
    contactInfo[0].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f);
    contactInfo[0].minHeight = footTranslation.GetY();

    footTranslation = Animation::JointLocalToGlobal(
        joint_parents_vec,
        left_foot_idx,
        defaultPose
    ).GetTranslation();

    contactInfo[1].jointIndex = left_foot_idx;
    contactInfo[1].hintOffset = Math::Vector(0.0f, 0.0f, 0.1f);
    contactInfo[1].minHeight = footTranslation.GetY();

    auto& rContacts = contactInfo[0].contactMask;
    auto& lContacts = contactInfo[1].contactMask;

    rContacts.resize(fullPoseMask.size());
    lContacts.resize(fullPoseMask.size());
    for (int i = 0; i < fullPoseMask.size(); ++i)
    {
        // don't flag it as a contact if it's been masked:
        rContacts[i] = rightFootMask.at(i) ? 0 : contacts.at(4 * i + 2);
        lContacts[i] = leftFootMask.at(i) ? 0 : contacts.at(4 * i + 0);

        // Flag the heel as a contact if the toe is a contact:
        rContacts[i] = std::min((rightFootMask.at(i) ? 0 : contacts.at(4 * i + 3)) + rContacts[i], 1.0f);
        lContacts[i] = std::min((leftFootMask.at(i) ? 0 : contacts.at(4 * i + 1)) + lContacts[i], 1.0f);
    }

    int left_toe_idx = -1;
    int right_toe_idx = -1;
    for(int i = 0; i < num_joints; ++i)
    {
        if(joint_parents_vec[i] == left_foot_idx)
        {
            left_toe_idx = i;
        }
        if(joint_parents_vec[i] == right_foot_idx)
        {
            right_toe_idx = i;
        }
    }

    if(left_toe_idx != -1 && right_toe_idx != -1)
    {
        auto toeTranslation = Animation::JointLocalToGlobal(
            joint_parents_vec,
            right_toe_idx,
            defaultPose
        ).GetTranslation();

        contactInfo.resize(4);
        contactInfo[2].jointIndex = right_toe_idx;
        contactInfo[2].contactType = Animation::kOneBone;
        contactInfo[2].minHeight = toeTranslation.GetY();

        contactInfo[3].jointIndex = left_toe_idx;
        contactInfo[3].contactType = Animation::kOneBone;
        contactInfo[3].minHeight = toeTranslation.GetY();

        auto& rToeContacts = contactInfo[2].contactMask;
        auto& lToeContacts = contactInfo[3].contactMask;

        // fill up the ankle contacts:
        rToeContacts.resize(fullPoseMask.size());
        lToeContacts.resize(fullPoseMask.size());

        for (int i = 0; i < fullPoseMask.size(); ++i)
        {
            // don't flag it as a contact if it's been masked:
            rToeContacts[i] = rightFootMask.at(i) ? 0 : contacts.at(4 * i + 3);
            lToeContacts[i] = leftFootMask.at(i) ? 0 : contacts.at(4 * i + 1);
        }
    }


    auto setTransforms = [gen_length, num_joints](
        std::vector< std::vector<Math::Transform> > &poses,
        const py::array_t<float> &rootTranslations,
        const py::array_t<float> &jointRotations
    )
    {
        for (size_t f = 0; f < gen_length; ++f)
        {
            poses[f][0].SetTranslation({
                strip_nan_inf(rootTranslations.at(3*f+0)),
                strip_nan_inf(rootTranslations.at(3*f+1)),
                strip_nan_inf(rootTranslations.at(3*f+2))
            });
        }

        for (size_t f = 0; f < gen_length; ++f)
        {
            for (size_t j = 0; j < num_joints; ++j)
            {
                // x y z w order:
                Math::Quaternion q(
                    strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 1)),
                    strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 2)),
                    strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 3)),
                    strip_nan_inf(jointRotations.at(4 * (num_joints * f + j) + 0))
                );
                q.Normalize();
                poses[f][j].SetRotation(q);
            }
        }
    };

    std::vector< std::vector<Math::Transform> > posesFixed(gen_length, defaultPose);
    setTransforms(posesFixed, rootTranslations, jointRotations);

    std::vector< std::vector<Math::Transform> > posesTarget(gen_length, defaultPose);
    setTransforms(posesTarget, rootTranslationsTarget, jointRotationsTarget);

    std::vector<float> fullPoseMask_vec;
    std::vector<float> rootMask_vec;
    for (size_t f = 0; f < gen_length; ++f)
    {
        fullPoseMask_vec.push_back(fullPoseMask.at(f));
        rootMask_vec.push_back(rootMask.at(f));
    }

    Animation::CorrectMotion(
        posesFixed,
        posesTarget,
        fullPoseMask_vec,
        rootMask_vec,
        contactInfo,
        endEffectorPins,
        joint_parents_vec,
        defaultPose,
        contact_threshold,
        root_margin,
        has_double_ankle_joints
    );

    for (size_t f = 0; f < gen_length; ++f)
    {
        auto t = posesFixed[f][0].GetTranslation();
        rootTranslations.mutable_at(3*f+0) = t.GetX();
        rootTranslations.mutable_at(3*f+1) = t.GetY();
        rootTranslations.mutable_at(3*f+2) = t.GetZ();
    }

    for (size_t f = 0; f < gen_length; ++f)
    {
        for (size_t j = 0; j < num_joints; ++j)
        {
            auto q = posesFixed[f][j].GetRotation();
            // w x y z order
            jointRotations.mutable_at(4 * (num_joints * f + j) + 0) = ((float*)&q)[3];
            jointRotations.mutable_at(4 * (num_joints * f + j) + 1) = ((float*)&q)[0];
            jointRotations.mutable_at(4 * (num_joints * f + j) + 2) = ((float*)&q)[1];
            jointRotations.mutable_at(4 * (num_joints * f + j) + 3) = ((float*)&q)[2];
        }
    }

}

PYBIND11_MODULE(_motion_correction, m) {
    m.doc() = "Motion Correction Python bindings";
    m.def("correct_motion", &correct_motion);
}


================================================
FILE: MotionCorrection/src/cpp/Compiler.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

// Compiler specific defines

// Finds the compiler type and version.
#if defined(__clang__)
#    define COMPILER_CLANG
#elif defined(__GNUC__) // Check after Clang, as Clang defines this too
#    define COMPILER_GNUC
#elif defined(_MSC_VER) // Check after Clang, since we could be building with either within VS
#    define COMPILER_MSVC
#else
#    pragma error "Unknown compiler. "
#endif

#if defined(COMPILER_MSVC)
	#define FORCE_INLINE __forceinline
#elif defined(COMPILER_GNUC)
	#define FORCE_INLINE inline __attribute__((always_inline))
#endif


================================================
FILE: MotionCorrection/src/cpp/Debug.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Platform.h"

#define ASSERT( cond ) do { if( !(cond) ) { DEBUG_BREAK(); } } while( 0 )
#define HALT() { DEBUG_BREAK(); }
#define UNIMPLEMENTED_FUNCTION() { DEBUG_BREAK(); }
#define UNREACHABLE_CODE() { DEBUG_BREAK(); }


================================================
FILE: MotionCorrection/src/cpp/Math/Constants.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include <limits>

// Mathematical constants

namespace Math
{
    static constexpr float const Epsilon = 1.0e-06f;
    static constexpr float const LargeEpsilon = 1.0e-04f;
    static constexpr float const HugeEpsilon = 1.0e-02f;
    static constexpr float const Pi = 3.141592654f;
    static constexpr float const TwoPi = 6.283185307f;
    static constexpr float const OneDivPi = 0.318309886f;
    static constexpr float const OneDivTwoPi = 0.159154943f;
    static constexpr float const PiDivTwo = 1.570796327f;
    static constexpr float const PiDivFour = 0.785398163f;

    static constexpr float const SqrtTwo = 1.4142135623730950488016887242097f;
    static constexpr float const OneDivSqrtTwo = 1.0f / SqrtTwo;

    static constexpr float const DegreesToRadians = 0.0174532925f;
    static constexpr float const RadiansToDegrees = 57.2957795f;

    static constexpr float const Infinity = std::numeric_limits<float>::infinity();
    static constexpr float const QNaN = std::numeric_limits<float>::quiet_NaN();
}


================================================
FILE: MotionCorrection/src/cpp/Math/Matrix.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "Matrix.h"

#include <cfloat>

using namespace Math;

namespace
{
    static bool CheckForZeroScaleInRow(float scale, const Vector& row)
    {
        float const absScale = Math::Abs(scale);

        for (int i = 0; i < 3; i++)
        {
            if (absScale < 1 && Math::Abs(row[i]) >= FLT_MAX * absScale)
            {
                return false;
            }
        }

        return true;
    }

    static bool ExtractAndRemoveScalingAndShear(Matrix& matrix, Vector& scale, Vector& shear)
    {
        scale = Vector::Zero;
        shear = Vector::Zero;

        Float3 scaleValues = Float3::Zero;
        Float3 shearValues = Float3::Zero;

        // This implementation follows the technique described in the paper by
        // Spencer W. Thomas in the Graphics Gems II article: "Decomposing a
        // Matrix into Simple Transformations", p. 320.

        Vector row[3];
        row[0] = Vector(matrix[0][0], matrix[0][1], matrix[0][2]);
        row[1] = Vector(matrix[1][0], matrix[1][1], matrix[1][2]);
        row[2] = Vector(matrix[2][0], matrix[2][1], matrix[2][2]);

        float maxVal = 0;
        for (int i = 0; i < 3; i++)
        {
            for (int j = 0; j < 3; j++)
            {
                if (Math::Abs(row[i][j]) > maxVal)
                {
                    maxVal = Math::Abs(row[i][j]);
                }
            }
        }

        // We normalize the 3x3 matrix here.
        // It was noticed that this can improve numerical stability significantly,
        // especially when many of the upper 3x3 matrix's coefficients are very
        // close to zero; we correct for this step at the end by multiplying the
        // scaling factors by maxVal at the end (shear and rotation are not
        // affected by the normalization).

        if (maxVal != 0)
        {
            for (int i = 0; i < 3; i++)
            {
                if (!CheckForZeroScaleInRow(maxVal, row[i]))
                {
                    return false;
                }
                else
                {
                    row[i] /= maxVal;
                }
            }
        }

        // Compute X scale factor.
        scaleValues.m_x = row[0].Length3().ToFloat();
        if (!CheckForZeroScaleInRow(scaleValues.m_x, row[0]))
        {
            return false;
        }

        // Normalize first row.
        row[0] /= scaleValues.m_x;

        // An XY shear factor will shear the X coord. as the Y coord. changes.
        // There are 6 combinations (XY, XZ, YZ, YX, ZX, ZY), although we only
        // extract the first 3 because we can effect the last 3 by shearing in
        // XY, XZ, YZ combined rotations and scales.
        //
        // shear matrix <   1,  YX,  ZX,  0,
        //                 XY,   1,  ZY,  0,
        //                 XZ,  YZ,   1,  0,
        //                  0,   0,   0,  1 >

        // Compute XY shear factor and make 2nd row orthogonal to 1st.
        shearValues[0] = Vector::Dot3(row[0], row[1]).ToFloat();
        row[1] -= row[0] * shearValues[0];

        // Now, compute Y scale.
        scaleValues.m_y = row[1].Length3().ToFloat();
        if (!CheckForZeroScaleInRow(scaleValues.m_y, row[1]))
        {
            return false;
        }

        // Normalize 2nd row and correct the XY shear factor for Y scaling.
        row[1] /= scaleValues.m_y;
        shearValues[0] /= scaleValues.m_y;

        // Compute XZ and YZ shears, orthogonalize 3rd row.
        shearValues[1] = Vector::Dot3(row[0], row[2]).ToFloat();
        row[2] -= row[0] * shearValues[1];
        shearValues[2] = Vector::Dot3(row[1], row[2]).ToFloat();
        row[2] -= row[1] * shearValues[2];

        // Next, get Z scale.
        scaleValues.m_z = row[2].Length3().ToFloat();
        if (!CheckForZeroScaleInRow(scaleValues.m_z, row[2]))
        {
            return false;
        }

        // Normalize 3rd row and correct the XZ and YZ shear factors for Z scaling.
        row[2] /= scaleValues.m_z;
        shearValues[1] /= scaleValues.m_z;
        shearValues[2] /= scaleValues.m_z;

        // At this point, the upper 3x3 matrix in mat is orthonormal.
        // Check for a coordinate system flip. If the determinant
        // is less than zero, then negate the matrix and the scaling factors.
        if (Vector::Dot3(row[0], Vector::Cross3(row[1], row[2])).ToFloat() < 0)
        {
            for (int i = 0; i < 3; i++)
            {
                scaleValues[i] *= -1;
                row[i] *= -1;
            }
        }

        // Copy over the orthonormal rows into the returned matrix.
        // The upper 3x3 matrix in mat is now a rotation matrix.
        for (int i = 0; i < 3; i++)
        {
            matrix[i].SetX(row[i][0]);
            matrix[i].SetY(row[i][1]);
            matrix[i].SetZ(row[i][2]);
        }

        // Correct the scaling factors for the normalization step that we
        // performed above; shear and rotation are not affected by the
        // normalization.
        scaleValues *= maxVal;

        scale = Vector(scaleValues);
        shear = Vector(shearValues);

        return true;
    }
}

namespace Math
{
    Matrix const Matrix::Identity(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1);

    Matrix::Matrix(float v00, float v01, float v02, float v03, float v10, float v11, float v12, float v13, float v20, float v21, float v22, float v23, float v30, float v31, float v32, float v33)
    {
        m_rows[0] = Vector(v00, v01, v02, v03);
        m_rows[1] = Vector(v10, v11, v12, v13);
        m_rows[2] = Vector(v20, v21, v22, v23);
        m_rows[3] = Vector(v30, v31, v32, v33);
    }

    Matrix::Matrix(float values[16])
    {
        m_rows[0] = Vector(values[0], values[1], values[2], values[3]);
        m_rows[1] = Vector(values[4], values[5], values[6], values[7]);
        m_rows[2] = Vector(values[8], values[9], values[10], values[11]);
        m_rows[3] = Vector(values[12], values[13], values[14], values[15]);
    }

    Matrix::Matrix(const Vector& xAxis, const Vector& yAxis, const Vector& zAxis)
    {
        m_rows[0] = xAxis;
        m_rows[1] = yAxis;
        m_rows[2] = zAxis;
        m_rows[3] = Vector::UnitW;
    }

    Matrix::Matrix(const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation)
    {
        m_rows[0] = xAxis;
        m_rows[1] = yAxis;
        m_rows[2] = zAxis;
        m_rows[3] = translation.GetWithW1();
    }

    Matrix::Matrix(const EulerAngles& eulerAngles, const Vector translation)
    {
        float cx, cy, cz, sx, sy, sz, czsx, cxcz, sysz;

        sx = sinf((float)eulerAngles.m_x); cx = cosf((float)eulerAngles.m_x);
        sy = sinf((float)eulerAngles.m_y); cy = cosf((float)eulerAngles.m_y);
        sz = sinf((float)eulerAngles.m_z); cz = cosf((float)eulerAngles.m_z);

        czsx = cz * sx;
        cxcz = cx * cz;
        sysz = sy * sz;

        // Order is XYZ
        m_values[0][0] = cy * cz;
        m_values[0][1] = cy * sz;
        m_values[0][2] = -sy;
        m_values[1][0] = czsx * sy - cx * sz;
        m_values[1][1] = cxcz + sx * sysz;
        m_values[1][2] = cy * sx;
        m_values[2][0] = cxcz * sy + sx * sz;
        m_values[2][1] = -czsx + cx * sysz;
        m_values[2][2] = cx * cy;
        m_values[0][3] = 0.0f;
        m_values[1][3] = 0.0f;
        m_values[2][3] = 0.0f;

        // Translation
        m_rows[3] = translation.GetWithW1();
    }

    EulerAngles Matrix::ToEulerAngles() const
    {
        EulerAngles result;

        result.m_x = Radians(Math::ATan2(m_values[1][2], m_values[2][2]));

        float const c2 = Math::Sqrt((m_values[0][0] * m_values[0][0]) + (m_values[0][1] * m_values[0][1]));
        result.m_y = Radians(Math::ATan2(-m_values[0][2], c2));

        float const s1 = Math::Sin((float)result.m_x);
        float const c1 = Math::Cos((float)result.m_x);
        result.m_z = Radians(Math::ATan2((s1 * m_values[2][0]) - (c1 * m_values[1][0]), (c1 * m_values[1][1]) - (s1 * m_values[2][1])));

        return result;
    }

    bool Matrix::Decompose(Quaternion& outRotation, Vector& outTranslation, Vector& outScale) const
    {
        Matrix copy = *this;
        Vector shr = Vector::Zero;
        outScale = Vector::Zero;

        // Extract and remove scale and shear from matrix
        if (ExtractAndRemoveScalingAndShear(copy, outScale, shr))
        {
            // Extract rotation and translation from unscaled matrix
            outRotation = copy.GetRotation();
            outTranslation = copy.GetTranslation().GetWithW0();
            return true;
        }

        return false;
    }

    Vector Matrix::GetScale() const
    {
        Matrix copy = *this;
        Vector scale = Vector::Zero, shear;
        if (!ExtractAndRemoveScalingAndShear(copy, scale, shear))
        {
            float const lengthX = m_rows[0].Length3().ToFloat();
            float const lengthY = m_rows[1].Length3().ToFloat();
            float const lengthZ = m_rows[2].Length3().ToFloat();
            scale = Vector(lengthX, lengthY, lengthZ, 0.0f);
        }

        return scale;
    }

    Matrix& Matrix::SetScale(const Vector& newScale)
    {
        Vector scale, shear;
        bool result = ExtractAndRemoveScalingAndShear(*this, scale, shear);

        // Cannot set scale on matrix that contains zero-scale
        ASSERT(result);

        m_rows[0] = m_rows[0] * newScale.GetSplatX();
        m_rows[1] = m_rows[1] * newScale.GetSplatY();
        m_rows[2] = m_rows[2] * newScale.GetSplatZ();
        return *this;
    }

    Matrix& Matrix::RemoveScale()
    {
        Vector scale, shear;
        bool result = ExtractAndRemoveScalingAndShear(*this, scale, shear);

        // Cannot remove zero scale from matrix
        ASSERT(result);

        return *this;
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Matrix.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Vector.h"
#include "Quaternion.h"

enum class CoordinateSpace : uint8_t
{
    World,
    Local,
};

//
// Matrices are Row-Major
// Multiplication order is right to left
// ObjectWorldTransform = LocalObjectTransform * WorldTransform
//

namespace Math
{
    class alignas(16) Matrix
    {
    public:

        static Matrix const Identity;

    public:

        static Matrix FromRotation(const Quaternion& rotation);
        static Matrix FromTranslation(const Vector& translation);
        static Matrix FromScale(const Vector& scale);
        static Matrix FromUniformScale(float uniformScale);
        static Matrix FromTranslationAndScale(const Vector& translation, const Vector& scale);
        static Matrix FromRotationBetweenVectors(const Vector sourceVector, const Vector targetVector);

    public:

        explicit Matrix();
        explicit Matrix(NoInit_t);
        explicit Matrix(ZeroInit_t);
        explicit Matrix(float v00, float v01, float v02, float v03,
                        float v10, float v11, float v12, float v13,
                        float v20, float v21, float v22, float v23,
                        float v30, float v31, float v32, float v33);
        explicit Matrix(float values[16]);
        explicit Matrix(Vector const& xAxis, Vector const& yAxis, Vector const& zAxis);
        explicit Matrix(Vector const& xAxis, Vector const& yAxis, Vector const& zAxis, Vector const& translation);

        Matrix(const Vector axis, Radians angleRadians);
        Matrix(const AxisAngle axisAngle);

        explicit Matrix(const Quaternion& rotation);
        explicit Matrix(const Quaternion& rotation, const Vector& translation, const Vector& scale = Vector::One);
        explicit Matrix(const Quaternion& rotation, const Vector& translation, float scale = 1.0f);
        explicit Matrix(const EulerAngles& eulerAngles, const Vector translation = Vector::UnitW);

        EulerAngles ToEulerAngles() const;

        float* AsFloatArray();
        const float* AsFloatArray() const;
        const Vector& GetRow(uint32_t row) const;

        const Vector& GetAxisX() const;
        const Vector& GetAxisY() const;
        const Vector& GetAxisZ() const;

        void SetAxisX(const Vector& xAxis);
        void SetAxisY(const Vector& yAxis);
        void SetAxisZ(const Vector& zAxis);

        Float3 GetForwardVector() const;
        Float3 GetRightVector() const;
        Float3 GetUpVector() const;

        Vector GetUnitAxisX() const;
        Vector GetUnitAxisY() const;
        Vector GetUnitAxisZ() const;

        bool IsIdentity() const;
        bool IsOrthogonal() const;
        bool IsOrthonormal() const;

        bool Decompose(Quaternion& outRotation, Vector& outTranslation, Vector& outScale) const;

        Matrix& Transpose();
        Matrix GetTransposed() const;

        Matrix& Invert();
        Matrix GetInverse() const;

        Vector GetDeterminant() const;
        float GetDeterminantAsFloat() const;

        Vector GetTranslation() const;
        const Vector& GetTranslationWithW() const;
        Matrix& SetTranslation(Vector const& v);
        Matrix& SetTranslation(Float3 const& v);
        Matrix& SetTranslation(Float4 const& v);

        Quaternion GetRotation() const;

        Matrix& SetRotation(const Matrix& rotation);
        Matrix& SetRotation(const Quaternion& rotation);

        Matrix& SetRotationMaintainingScale(const Matrix& rotation);
        Matrix& SetRotationMaintainingScale(const Quaternion& rotation);

        Vector GetScale() const;

        Matrix& RemoveScale();
        Matrix& SetScale(const Vector& scale);
        Matrix& SetScale(float uniformScale);

        Matrix& RemoveScaleFast();
        Matrix& SetScaleFast(const Vector& scale);
        Matrix& SetScaleFast(float uniformScale);

        //
        // Operators
        //

        // Applies rotation and scale to a vector and returns a result with the W = 0
        Vector RotateVector(const Vector& vector) const;

        // Applies rotation and scale to a vector and returns a result with the W = 0
        Vector TransformNormal(const Vector& vector) const;

        // Applies the transformation to a given point and ensures the resulting W = 1
        Vector TransformPoint(const Vector& point) const;

        // Applies the transformation to a vector ignoring the W value.
        // Same as TransformPoint with the result W left unchanged
        Vector TransformVector3(const Vector& vector) const;

        // Applies the transformation to a given vector with the result W left unchanged
        Vector TransformVector4(const Vector& vector) const;

        Vector& operator[](uint32_t i);
        const Vector operator[](uint32_t i) const;

        Matrix operator*(const Matrix& rhs) const;
        Matrix& operator*=(const Matrix& rhs);

        Matrix operator*(const Quaternion& rhs) const;
        Matrix operator*=(const Quaternion& rhs);

        bool operator==(const Matrix& rhs) const;

    public:

        union
        {
            Vector      m_rows[4];
            float       m_values[4][4];
        };
    };
}

#include "Matrix.inl"


================================================
FILE: MotionCorrection/src/cpp/Math/Matrix.inl
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include <cstring>

#include "Matrix.h"

namespace Math
{
    inline Matrix Matrix::FromRotation(const Quaternion& rotation)
    {
        return Matrix(rotation);
    }

    inline Matrix Matrix::FromTranslation(const Vector& translation)
    {
        Matrix M;
        M.m_rows[0] = Vector::UnitX;
        M.m_rows[1] = Vector::UnitY;
        M.m_rows[2] = Vector::UnitZ;
        M.m_rows[3] = translation.GetWithW1();
        return M;
    }

    inline Matrix Matrix::FromScale(const Vector& scale)
    {
        Matrix M;
        M.m_rows[0] = _mm_and_ps(scale, SIMD::g_maskX000);
        M.m_rows[1] = _mm_and_ps(scale, SIMD::g_mask0Y00);
        M.m_rows[2] = _mm_and_ps(scale, SIMD::g_mask00Z0);
        M.m_rows[3] = Vector::UnitW;
        return M;
    }

    inline Matrix Matrix::FromUniformScale(float uniformScale)
    {
        Matrix M;
        M.m_rows[0] = _mm_set_ps(0, 0, 0, uniformScale);
        M.m_rows[1] = _mm_set_ps(0, 0, uniformScale, 0);
        M.m_rows[2] = _mm_set_ps(0, uniformScale, 0, 0);
        M.m_rows[3] = Vector::UnitW;
        return M;
    }

    inline Matrix Matrix::FromTranslationAndScale(const Vector& translation, const Vector& scale)
    {
        Matrix M;
        M.m_rows[0] = _mm_and_ps(scale, SIMD::g_maskX000);
        M.m_rows[1] = _mm_and_ps(scale, SIMD::g_mask0Y00);
        M.m_rows[2] = _mm_and_ps(scale, SIMD::g_mask00Z0);
        M.m_rows[3] = translation.GetWithW1();
        return M;
    }

    inline Matrix Matrix::FromRotationBetweenVectors(Vector const sourceVector, Vector const targetVector)
    {
        return Matrix(Quaternion::FromRotationBetweenNormalizedVectors(sourceVector, targetVector));
    }

    inline Matrix::Matrix()
    {
        memcpy(this, &Matrix::Identity, sizeof(Matrix));
    }

    inline Matrix::Matrix(NoInit_t)
    {
    }

    inline Matrix::Matrix(ZeroInit_t)
    {
        memset(this, 0, sizeof(Matrix));
    }

    inline Matrix::Matrix(const Vector axis, Radians angleRadians)
    {
        Vector normal = axis.GetNormalized3();

        Vector C0, C1;
        Vector::SinCos(C0, C1, Vector((float)angleRadians));
        Vector C2 = Vector::One - C1;

        __m128 N0 = _mm_shuffle_ps(normal, normal, _MM_SHUFFLE(3, 0, 2, 1));
        __m128 N1 = _mm_shuffle_ps(normal, normal, _MM_SHUFFLE(3, 1, 0, 2));

        __m128 V0 = _mm_mul_ps(C2, N0);
        V0 = _mm_mul_ps(V0, N1);

        __m128 R0 = _mm_mul_ps(C2, normal);
        R0 = _mm_mul_ps(R0, normal);
        R0 = _mm_add_ps(R0, C1);

        __m128 R1 = _mm_mul_ps(C0, normal);
        R1 = _mm_add_ps(R1, V0);
        __m128 R2 = _mm_mul_ps(C0, normal);
        R2 = _mm_sub_ps(V0, R2);

        V0 = _mm_and_ps(R0, SIMD::g_maskXYZ0);
        __m128 V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 1, 2, 0));
        V1 = _mm_shuffle_ps(V1, V1, _MM_SHUFFLE(0, 3, 2, 1));
        __m128 V2 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(0, 0, 1, 1));
        V2 = _mm_shuffle_ps(V2, V2, _MM_SHUFFLE(2, 0, 2, 0));

        R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(1, 0, 3, 0));
        R2 = _mm_shuffle_ps(R2, R2, _MM_SHUFFLE(1, 3, 2, 0));

        m_rows[0] = R2;

        R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(3, 2, 3, 1));
        R2 = _mm_shuffle_ps(R2, R2, _MM_SHUFFLE(1, 3, 0, 2));
        m_rows[1] = R2;

        V2 = _mm_shuffle_ps(V2, V0, _MM_SHUFFLE(3, 2, 1, 0));
        m_rows[2] = V2;
        m_rows[3] = Vector::UnitW;
    }

    inline Matrix::Matrix(const AxisAngle axisAngle)
        : Matrix(Vector(axisAngle.m_axis), axisAngle.m_angle)
    {
    }

    inline Matrix::Matrix(const Quaternion& rotation)
    {
        SetRotation(rotation);
        m_rows[3] = Vector::UnitW;
    }

    inline Matrix::Matrix(const Quaternion& rotation, const Vector& translation, const Vector& scale)
    {
        SetRotation(rotation);
        m_rows[0] = m_rows[0] * scale.GetSplatX();
        m_rows[1] = m_rows[1] * scale.GetSplatY();
        m_rows[2] = m_rows[2] * scale.GetSplatZ();
        m_rows[3] = translation.GetWithW1();
    }

    inline Matrix::Matrix(const Quaternion& rotation, const Vector& translation, float scale)
        : Matrix(rotation, translation, Vector(scale))
    {
    }

    inline float* Matrix::AsFloatArray()
    {
        return &m_values[0][0];
    }

    inline const float* Matrix::AsFloatArray() const
    {
        return &m_values[0][0];
    }

    inline const Vector& Matrix::GetRow(uint32_t row) const
    {
        return m_rows[row];
    }

    inline const Vector& Matrix::GetAxisX() const
    {
        return m_rows[0];
    }

    inline const Vector& Matrix::GetAxisY() const
    {
        return m_rows[1];
    }

    inline const Vector& Matrix::GetAxisZ() const
    {
        return m_rows[2];
    }

    inline void Matrix::SetAxisX(const Vector& xAxis)
    {
        m_rows[0] = xAxis;
    }

    inline void Matrix::SetAxisY(const Vector& yAxis)
    {
        m_rows[1] = yAxis;
    }

    inline void Matrix::SetAxisZ(const Vector& zAxis)
    {
        m_rows[2] = zAxis;
    }


    inline Float3 Matrix::GetForwardVector() const
    {
        return GetAxisZ();
    }

    inline Float3 Matrix::GetRightVector() const
    {
        return GetAxisX();
    }

    inline Float3 Matrix::GetUpVector() const
    {
        return GetAxisY();
    }

    inline Vector Matrix::GetUnitAxisX() const
    {
        return m_rows[0].GetNormalized3();
    }

    inline Vector Matrix::GetUnitAxisY() const
    {
        return m_rows[1].GetNormalized3();
    }

    inline Vector Matrix::GetUnitAxisZ() const
    {
        return m_rows[2].GetNormalized3();
    }

    inline bool Matrix::IsIdentity() const
    {
        __m128 vTemp1 = _mm_cmpeq_ps(m_rows[0], Vector::UnitX);
        __m128 vTemp2 = _mm_cmpeq_ps(m_rows[1], Vector::UnitY);
        __m128 vTemp3 = _mm_cmpeq_ps(m_rows[2], Vector::UnitZ);
        __m128 vTemp4 = _mm_cmpeq_ps(m_rows[3], Vector::UnitW);
        vTemp1 = _mm_and_ps(vTemp1, vTemp2);
        vTemp3 = _mm_and_ps(vTemp3, vTemp4);
        vTemp1 = _mm_and_ps(vTemp1, vTemp3);
        return (_mm_movemask_ps(vTemp1) == 0x0f);
    }

    inline bool Matrix::IsOrthogonal() const
    {
        Matrix const transpose = GetTransposed();
        Matrix result = *this * transpose;
        return result.IsIdentity();
    }

    inline bool Matrix::IsOrthonormal() const
    {
        static const Vector three(3);
        auto dotCheck = Vector::Dot3(m_rows[0], m_rows[1]) + Vector::Dot3(m_rows[0], m_rows[2]) + Vector::Dot3(m_rows[1], m_rows[2]);
        auto magnitudeCheck = m_rows[0].LengthSquared3() + m_rows[1].LengthSquared3() + m_rows[2].LengthSquared3();
        auto result = dotCheck + magnitudeCheck;
        return result.IsNearEqual3(three);
    }

    inline Matrix& Matrix::Transpose()
    {
        __m128 vTemp1 = _mm_shuffle_ps(m_rows[0], m_rows[1], _MM_SHUFFLE(1, 0, 1, 0));
        __m128 vTemp3 = _mm_shuffle_ps(m_rows[0], m_rows[1], _MM_SHUFFLE(3, 2, 3, 2));
        __m128 vTemp2 = _mm_shuffle_ps(m_rows[2], m_rows[3], _MM_SHUFFLE(1, 0, 1, 0));
        __m128 vTemp4 = _mm_shuffle_ps(m_rows[2], m_rows[3], _MM_SHUFFLE(3, 2, 3, 2));
        m_rows[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
        m_rows[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
        m_rows[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
        m_rows[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
        return *this;
    }

    inline Matrix Matrix::GetTransposed() const
    {
        Matrix m = *this;
        m.Transpose();
        return m;
    }

    inline Matrix& Matrix::Invert()
    {
        Matrix MT = GetTransposed();
        __m128 V00 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(1, 1, 0, 0));
        __m128 V10 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(3, 2, 3, 2));
        __m128 V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(1, 1, 0, 0));
        __m128 V11 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(3, 2, 3, 2));
        __m128 V02 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[0], _MM_SHUFFLE(2, 0, 2, 0));
        __m128 V12 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[1], _MM_SHUFFLE(3, 1, 3, 1));

        __m128 D0 = _mm_mul_ps(V00, V10);
        __m128 D1 = _mm_mul_ps(V01, V11);
        __m128 D2 = _mm_mul_ps(V02, V12);

        V00 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(3, 2, 3, 2));
        V10 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(1, 1, 0, 0));
        V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(3, 2, 3, 2));
        V11 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(1, 1, 0, 0));
        V02 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[0], _MM_SHUFFLE(3, 1, 3, 1));
        V12 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[1], _MM_SHUFFLE(2, 0, 2, 0));

        V00 = _mm_mul_ps(V00, V10);
        V01 = _mm_mul_ps(V01, V11);
        V02 = _mm_mul_ps(V02, V12);
        D0 = _mm_sub_ps(D0, V00);
        D1 = _mm_sub_ps(D1, V01);
        D2 = _mm_sub_ps(D2, V02);
        // V11 = D0Y,D0W,D2Y,D2Y
        V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 1, 3, 1));
        V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(1, 0, 2, 1));
        V10 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(0, 3, 0, 2));
        V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(0, 1, 0, 2));
        V11 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(2, 1, 2, 1));
        // V13 = D1Y,D1W,D2W,D2W
        __m128 V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 3, 3, 1));
        V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(1, 0, 2, 1));
        V12 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(0, 3, 0, 2));
        __m128 V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(0, 1, 0, 2));
        V13 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(2, 1, 2, 1));

        __m128 C0 = _mm_mul_ps(V00, V10);
        __m128 C2 = _mm_mul_ps(V01, V11);
        __m128 C4 = _mm_mul_ps(V02, V12);
        __m128 C6 = _mm_mul_ps(V03, V13);

        // V11 = D0X,D0Y,D2X,D2X
        V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(0, 0, 1, 0));
        V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(2, 1, 3, 2));
        V10 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(2, 1, 0, 3));
        V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(1, 3, 2, 3));
        V11 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(0, 2, 1, 2));
        // V13 = D1X,D1Y,D2Z,D2Z
        V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(2, 2, 1, 0));
        V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(2, 1, 3, 2));
        V12 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(2, 1, 0, 3));
        V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(1, 3, 2, 3));
        V13 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(0, 2, 1, 2));

        V00 = _mm_mul_ps(V00, V10);
        V01 = _mm_mul_ps(V01, V11);
        V02 = _mm_mul_ps(V02, V12);
        V03 = _mm_mul_ps(V03, V13);
        C0 = _mm_sub_ps(C0, V00);
        C2 = _mm_sub_ps(C2, V01);
        C4 = _mm_sub_ps(C4, V02);
        C6 = _mm_sub_ps(C6, V03);

        V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(0, 3, 0, 3));
        // V10 = D0Z,D0Z,D2X,D2Y
        V10 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 2, 2));
        V10 = _mm_shuffle_ps(V10, V10, _MM_SHUFFLE(0, 2, 3, 0));
        V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(2, 0, 3, 1));
        // V11 = D0X,D0W,D2X,D2Y
        V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 3, 0));
        V11 = _mm_shuffle_ps(V11, V11, _MM_SHUFFLE(2, 1, 0, 3));
        V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(0, 3, 0, 3));
        // V12 = D1Z,D1Z,D2Z,D2W
        V12 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 2, 2));
        V12 = _mm_shuffle_ps(V12, V12, _MM_SHUFFLE(0, 2, 3, 0));
        V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(2, 0, 3, 1));
        // V13 = D1X,D1W,D2Z,D2W
        V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 3, 0));
        V13 = _mm_shuffle_ps(V13, V13, _MM_SHUFFLE(2, 1, 0, 3));

        V00 = _mm_mul_ps(V00, V10);
        V01 = _mm_mul_ps(V01, V11);
        V02 = _mm_mul_ps(V02, V12);
        V03 = _mm_mul_ps(V03, V13);
        __m128 C1 = _mm_sub_ps(C0, V00);
        C0 = _mm_add_ps(C0, V00);
        __m128 C3 = _mm_add_ps(C2, V01);
        C2 = _mm_sub_ps(C2, V01);
        __m128 C5 = _mm_sub_ps(C4, V02);
        C4 = _mm_add_ps(C4, V02);
        __m128 C7 = _mm_add_ps(C6, V03);
        C6 = _mm_sub_ps(C6, V03);

        C0 = _mm_shuffle_ps(C0, C1, _MM_SHUFFLE(3, 1, 2, 0));
        C2 = _mm_shuffle_ps(C2, C3, _MM_SHUFFLE(3, 1, 2, 0));
        C4 = _mm_shuffle_ps(C4, C5, _MM_SHUFFLE(3, 1, 2, 0));
        C6 = _mm_shuffle_ps(C6, C7, _MM_SHUFFLE(3, 1, 2, 0));
        C0 = _mm_shuffle_ps(C0, C0, _MM_SHUFFLE(3, 1, 2, 0));
        C2 = _mm_shuffle_ps(C2, C2, _MM_SHUFFLE(3, 1, 2, 0));
        C4 = _mm_shuffle_ps(C4, C4, _MM_SHUFFLE(3, 1, 2, 0));
        C6 = _mm_shuffle_ps(C6, C6, _MM_SHUFFLE(3, 1, 2, 0));

        __m128 vTemp = Vector::Dot4(C0, MT.m_rows[0]);
        vTemp = _mm_div_ps(Vector::One, vTemp);
        m_rows[0] = _mm_mul_ps(C0, vTemp);
        m_rows[1] = _mm_mul_ps(C2, vTemp);
        m_rows[2] = _mm_mul_ps(C4, vTemp);
        m_rows[3] = _mm_mul_ps(C6, vTemp);
        return *this;
    }

    inline Matrix Matrix::GetInverse() const
    {
        Matrix m = *this;
        m.Invert();
        return m;
    }

    inline Vector Matrix::GetDeterminant() const
    {
        Vector V0 = m_rows[2].Shuffle(1, 0, 0, 0);
        Vector V1 = m_rows[3].Shuffle(2, 2, 1, 1);
        Vector V2 = m_rows[2].Shuffle(1, 0, 0, 0);
        Vector V3 = m_rows[3].Shuffle(3, 3, 3, 2);
        Vector V4 = m_rows[2].Shuffle(2, 2, 1, 1);
        Vector V5 = m_rows[3].Shuffle(3, 3, 3, 2);

        Vector P0 = V0 * V1;
        Vector P1 = V2 * V3;
        Vector P2 = V4 * V5;

        V0 = m_rows[2].Shuffle(2, 2, 1, 1);
        V1 = m_rows[3].Shuffle(1, 0, 0, 0);
        V2 = m_rows[2].Shuffle(3, 3, 3, 2);
        V3 = m_rows[3].Shuffle(1, 0, 0, 0);
        V4 = m_rows[2].Shuffle(3, 3, 3, 2);
        V5 = m_rows[3].Shuffle(2, 2, 1, 1);

        P0 = Vector::NegativeMultiplySubtract(V0, V1, P0);
        P1 = Vector::NegativeMultiplySubtract(V2, V3, P1);
        P2 = Vector::NegativeMultiplySubtract(V4, V5, P2);

        V0 = m_rows[1].Shuffle(3, 3, 3, 2);
        V1 = m_rows[1].Shuffle(2, 2, 1, 1);
        V2 = m_rows[1].Shuffle(1, 0, 0, 0);

        static Vector const Sign(1.0f, -1.0f, 1.0f, -1.0f);
        Vector S = m_rows[0] * Sign;
        Vector R = V0 * P0;
        R = Vector::NegativeMultiplySubtract(V1, P1, R);
        R = Vector::MultiplyAdd(V2, P2, R);

        return Vector::Dot4(S, R);
    }

    inline float Matrix::GetDeterminantAsFloat() const
    {
        return GetDeterminant().GetX();
    }

    inline Vector Matrix::GetTranslation() const
    {
        return m_rows[3].GetWithW0();
    }

    inline const Vector& Matrix::GetTranslationWithW() const
    {
        return m_rows[3];
    }

    inline Matrix& Matrix::SetTranslation(const Vector& v)
    {
        m_rows[3] = v.GetWithW1();
        return *this;
    }

    inline Matrix& Matrix::SetTranslation(const Float3& v)
    {
        m_rows[3] = Vector(v, 1.0f);
        return *this;
    }

    inline Matrix& Matrix::SetTranslation(const Float4& v)
    {
        m_rows[3] = Vector(v.m_x, v.m_y, v.m_z, 1.0f);
        return *this;
    }

    inline Quaternion Matrix::GetRotation() const
    {
        // based on RTM: https://github.com/nfrechette/rtm

        const Vector& axisX = m_rows[0];
        const Vector& axisY = m_rows[1];
        const Vector& axisZ = m_rows[2];

        // Zero scale is not supported
        if (axisX.IsNearZero4() || axisY.IsNearZero4() || axisZ.IsNearZero4())
        {
            HALT();
        }

        float const axisX_X = axisX.GetX();
        float const axisY_Y = axisY.GetY();
        float const axisZ_Z = axisZ.GetZ();

        float const mtx_trace = axisX_X + axisY_Y + axisZ_Z;
        if (mtx_trace > 0.0)
        {
            float const axisX_y = axisX.GetY();
            float const axisX_z = axisX.GetZ();

            float const axisY_x = axisY.GetX();
            float const axisY_z = axisY.GetZ();

            float const axisZ_x = axisZ.GetX();
            float const axisZ_y = axisZ.GetY();

            float const inv_trace = Math::Reciprocal(Math::Sqrt(mtx_trace + 1.0f));
            float const half_inv_trace = inv_trace * 0.5f;

            float const m_x = (axisY_z - axisZ_y) * half_inv_trace;
            float const m_y = (axisZ_x - axisX_z) * half_inv_trace;
            float const m_z = (axisX_y - axisY_x) * half_inv_trace;
            float const m_w = Math::Reciprocal(inv_trace) * 0.5f;

            return Quaternion(m_x, m_y, m_z, m_w).GetNormalized();
        }
        else
        {
            // Find the axis with the highest diagonal value
            int32_t axisIdx0 = 0;
            if (axisY_Y > axisX_X)
            {
                axisIdx0 = 1;
            }

            if (axisZ_Z > m_rows[axisIdx0][axisIdx0])
            {
                axisIdx0 = 2;
            }

            int32_t const axisIdx1 = (axisIdx0 + 1) % 3;
            int32_t const axisIdx2 = (axisIdx1 + 1) % 3;

            float const pseudoTrace = 1.0f + m_rows[axisIdx0][axisIdx0] - m_rows[axisIdx1][axisIdx1] - m_rows[axisIdx2][axisIdx2];
            float const inversePseudoTrace = Math::Reciprocal(Math::Sqrt(pseudoTrace));
            float const halfInversePseudoTrace = inversePseudoTrace * 0.5f;

            Float4 rawQuatValues;
            rawQuatValues[axisIdx0] = Math::Reciprocal(inversePseudoTrace) * 0.5f;
            rawQuatValues[axisIdx1] = halfInversePseudoTrace * (m_rows[axisIdx0][axisIdx1] + m_rows[axisIdx1][axisIdx0]);
            rawQuatValues[axisIdx2] = halfInversePseudoTrace * (m_rows[axisIdx0][axisIdx2] + m_rows[axisIdx2][axisIdx0]);
            rawQuatValues[3] = halfInversePseudoTrace * (m_rows[axisIdx1][axisIdx2] - m_rows[axisIdx2][axisIdx1]);
            return Quaternion(rawQuatValues).GetNormalized();
        }
    }

    inline Matrix& Matrix::SetRotation(const Matrix& rotation)
    {
        ASSERT(Math::Abs(rotation.GetDeterminant().GetX()) == 1.0f);
        m_rows[0] = rotation.m_rows[0];
        m_rows[1] = rotation.m_rows[1];
        m_rows[2] = rotation.m_rows[2];
        return *this;
    }

    inline Matrix& Matrix::SetRotation(const Quaternion& rotation)
    {
        static __m128 const constant1110 = { 1.0f, 1.0f, 1.0f, 0.0f };

        __m128 Q0 = _mm_add_ps(rotation, rotation);
        __m128 Q1 = _mm_mul_ps(rotation, Q0);

        __m128 V0 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(3, 0, 0, 1));
        V0 = _mm_and_ps(V0, SIMD::g_maskXYZ0);
        __m128 V1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(3, 1, 2, 2));
        V1 = _mm_and_ps(V1, SIMD::g_maskXYZ0);
        __m128 R0 = _mm_sub_ps(constant1110, V0);
        R0 = _mm_sub_ps(R0, V1);

        V0 = _mm_shuffle_ps(rotation, rotation, _MM_SHUFFLE(3, 1, 0, 0));
        V1 = _mm_shuffle_ps(Q0, Q0, _MM_SHUFFLE(3, 2, 1, 2));
        V0 = _mm_mul_ps(V0, V1);

        V1 = _mm_shuffle_ps(rotation, rotation, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 V2 = _mm_shuffle_ps(Q0, Q0, _MM_SHUFFLE(3, 0, 2, 1));
        V1 = _mm_mul_ps(V1, V2);

        __m128 R1 = _mm_add_ps(V0, V1);
        __m128 R2 = _mm_sub_ps(V0, V1);

        V0 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(1, 0, 2, 1));
        V0 = _mm_shuffle_ps(V0, V0, _MM_SHUFFLE(1, 3, 2, 0));
        V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 2, 0, 0));
        V1 = _mm_shuffle_ps(V1, V1, _MM_SHUFFLE(2, 0, 2, 0));

        Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(1, 0, 3, 0));
        Q1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1, 3, 2, 0));

        m_rows[0] = Q1;

        Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(3, 2, 3, 1));
        Q1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1, 3, 0, 2));
        m_rows[1] = Q1;

        Q1 = _mm_shuffle_ps(V1, R0, _MM_SHUFFLE(3, 2, 1, 0));
        m_rows[2] = Q1;
        return *this;
    }

    inline Matrix& Matrix::SetRotationMaintainingScale(const Matrix& rotation)
    {
        Vector const scale = GetScale();
        SetRotation(rotation);
        return SetScale(scale);
    }

    inline Matrix& Matrix::SetRotationMaintainingScale(const Quaternion& rotation)
    {
        Vector const scale = GetScale();
        SetRotation(rotation);
        return SetScale(scale);
    }

    inline Matrix& Matrix::SetScale(float uniformScale)
    {
        SetScale(Vector(uniformScale));
        return *this;
    }

    inline Matrix& Matrix::RemoveScaleFast()
    {
        m_rows[0] = m_rows[0].GetNormalized4();
        m_rows[1] = m_rows[1].GetNormalized4();
        m_rows[2] = m_rows[2].GetNormalized4();
        return *this;
    }

    inline Matrix& Matrix::SetScaleFast(const Vector& scale)
    {
        m_rows[0] = m_rows[0].GetNormalized3() * scale.GetSplatX();
        m_rows[1] = m_rows[1].GetNormalized3() * scale.GetSplatY();
        m_rows[2] = m_rows[2].GetNormalized3() * scale.GetSplatZ();
        return *this;
    }

    inline Matrix& Matrix::SetScaleFast(float uniformScale)
    {
        SetScaleFast(Vector(uniformScale));
        return *this;
    }

    inline Vector Matrix::RotateVector(const Vector& vector) const
    {
        Vector const X = vector.GetSplatX();
        Vector const Y = vector.GetSplatY();
        Vector const Z = vector.GetSplatZ();

        Vector Result = Z * m_rows[2];
        Result = Vector::MultiplyAdd(Y, m_rows[1], Result);
        Result = Vector::MultiplyAdd(X, m_rows[0], Result);

        return Result;
    }

    inline Vector Matrix::TransformNormal(const Vector& vector) const
    {
        return RotateVector(vector);
    }

    inline Vector Matrix::TransformPoint(const Vector& point) const
    {
        Vector const X = point.GetSplatX();
        Vector const Y = point.GetSplatY();
        Vector const Z = point.GetSplatZ();

        Vector result = Vector::MultiplyAdd(Z, m_rows[2], m_rows[3]);
        result = Vector::MultiplyAdd(Y, m_rows[1], result);
        result = Vector::MultiplyAdd(X, m_rows[0], result);

        Vector const W = result.GetSplatW();
        return result / W;
    }

    inline Vector Matrix::TransformVector3(const Vector& V) const
    {
        Vector const X = V.GetSplatX();
        Vector const Y = V.GetSplatY();
        Vector const Z = V.GetSplatZ();

        Vector result = Vector::MultiplyAdd(Z, m_rows[2], m_rows[3]);
        result = Vector::MultiplyAdd(Y, m_rows[1], result);
        result = Vector::MultiplyAdd(X, m_rows[0], result);

        return result;
    }

    inline Vector Matrix::TransformVector4(const Vector& V) const
    {
        // Splat m_x,m_y,m_z and m_w
        Vector vTempX = V.GetSplatX();
        Vector vTempY = V.GetSplatY();
        Vector vTempZ = V.GetSplatZ();
        Vector vTempW = V.GetSplatW();

        // Mul by the matrix
        vTempX = _mm_mul_ps(vTempX, m_rows[0]);
        vTempY = _mm_mul_ps(vTempY, m_rows[1]);
        vTempZ = _mm_mul_ps(vTempZ, m_rows[2]);
        vTempW = _mm_mul_ps(vTempW, m_rows[3]);

        // Add them all together
        vTempX = _mm_add_ps(vTempX, vTempY);
        vTempZ = _mm_add_ps(vTempZ, vTempW);
        vTempX = _mm_add_ps(vTempX, vTempZ);

        return vTempX;
    }

    inline Vector& Matrix::operator[](uint32_t i)
    {
        ASSERT(i < 4);
        return m_rows[i];
    }

    inline const Vector Matrix::operator[](uint32_t i) const
    {
        ASSERT(i < 4);
        return m_rows[i];
    }

    inline Matrix Matrix::operator*(const Matrix& rhs) const
    {
        Matrix result = *this;
        result *= rhs;
        return result;
    }

    inline Matrix& Matrix::operator*= (const Matrix& rhs)
    {
        Vector vX, vY, vZ, vW;

        // Use vW to hold the original row
        vW = m_rows[0];
        vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0));
        vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1));
        vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2));
        vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3));
        vX = _mm_mul_ps(vX, rhs.m_rows[0]);
        vY = _mm_mul_ps(vY, rhs.m_rows[1]);
        vZ = _mm_mul_ps(vZ, rhs.m_rows[2]);
        vW = _mm_mul_ps(vW, rhs.m_rows[3]);
        vX = _mm_add_ps(vX, vZ);
        vY = _mm_add_ps(vY, vW);
        vX = _mm_add_ps(vX, vY);
        m_rows[0] = vX;

        // Repeat for the other 3 rows
        vW = m_rows[1];
        vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0));
        vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1));
        vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2));
        vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3));
        vX = _mm_mul_ps(vX, rhs.m_rows[0]);
        vY = _mm_mul_ps(vY, rhs.m_rows[1]);
        vZ = _mm_mul_ps(vZ, rhs.m_rows[2]);
        vW = _mm_mul_ps(vW, rhs.m_rows[3]);
        vX = _mm_add_ps(vX, vZ);
        vY = _mm_add_ps(vY, vW);
        vX = _mm_add_ps(vX, vY);
        m_rows[1] = vX;

        vW = m_rows[2];
        vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0));
        vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1));
        vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2));
        vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3));
        vX = _mm_mul_ps(vX, rhs.m_rows[0]);
        vY = _mm_mul_ps(vY, rhs.m_rows[1]);
        vZ = _mm_mul_ps(vZ, rhs.m_rows[2]);
        vW = _mm_mul_ps(vW, rhs.m_rows[3]);
        vX = _mm_add_ps(vX, vZ);
        vY = _mm_add_ps(vY, vW);
        vX = _mm_add_ps(vX, vY);
        m_rows[2] = vX;

        vW = m_rows[3];
        vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0));
        vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1));
        vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2));
        vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3));
        vX = _mm_mul_ps(vX, rhs.m_rows[0]);
        vY = _mm_mul_ps(vY, rhs.m_rows[1]);
        vZ = _mm_mul_ps(vZ, rhs.m_rows[2]);
        vW = _mm_mul_ps(vW, rhs.m_rows[3]);
        vX = _mm_add_ps(vX, vZ);
        vY = _mm_add_ps(vY, vW);
        vX = _mm_add_ps(vX, vY);
        m_rows[3] = vX;
        return *this;
    }

    inline Matrix Matrix::operator*(const Quaternion& rhs) const
    {
        return operator*(Matrix(rhs));
    }

    inline Matrix Matrix::operator*=(const Quaternion& rhs)
    {
        return operator*=(Matrix(rhs));
    }

    inline bool Matrix::operator==(const Matrix& rhs) const
    {
        for (auto i = 0; i < 4; i++)
        {
            for (auto j = 0; j < 4; j++)
            {
                if (m_values[i][j] != rhs.m_values[i][j])
                {
                    return false;
                }
            }
        }

        return true;
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Quaternion.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "Quaternion.h"
#include "Matrix.h"

namespace Math
{
    Quaternion const Quaternion::Identity(0, 0, 0, 1);

    // Rotation order is XYZ
    EulerAngles Quaternion::ToEulerAngles() const
    {
        return Matrix(*this).ToEulerAngles();
    }

    Quaternion Quaternion::LookRotation(const Vector& forward, const Vector& up)
    {
        const Vector t = Vector::Cross3(up, forward).Normalize3();
        return Matrix(t, Vector::Cross3(forward, t), forward).GetRotation();
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Quaternion.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Vector.h"

namespace Math
{
    class alignas(16) Quaternion
    {
    public:

        static Quaternion const Identity;

        // Calculate the rotation required to align the source vector to the target vector (shortest path)
        static Quaternion FromRotationBetweenNormalizedVectors(const Vector& sourceVector, const Vector& targetVector);

        // Calculate the rotation required to align one vector onto another but also taking account a fallback rotation axis for opposite parallel vectors
        static Quaternion FromRotationBetweenNormalizedVectors(const Vector& sourceVector, const Vector& targetVector, const Vector& fallbackRotationAxis);

        // Calculate the rotation required to align the source vector to the target vector (shortest path)
        static Quaternion FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector);

        // Normalized LERP - not accurate - only use for really short distances
        static Quaternion NLerp(const Quaternion& from, const Quaternion& to, float t);

        // Standard and accurate Spherical LERP - based on DirectX Math
        static Quaternion SLerp(const Quaternion& from, const Quaternion& to, float t);

        // Fast approximation of a Spherical LERP - based on "A fast and accurate estimate for SLERP" by David Eberly
        static Quaternion FastSLerp(const Quaternion& from, const Quaternion& to, float t);

        // Spherical quadrangle/cubic interpolation for quaternions
        static Quaternion SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t);

        // Calculate the shortest delta quaternion needed to rotate 'from' onto 'to'
        static Quaternion Delta(const Quaternion& from, const Quaternion& to);

        // Simple vector dot product between two quaternions
        static Vector Dot(const Quaternion& q0, const Quaternion& q1);

        // Calculate the angular distance between two quaternions
        static Radians Distance(const Quaternion& q0, const Quaternion& q1);

        // Calculate look rotation given forward and up vectors
        static Quaternion LookRotation(const Vector& forward, const Vector& up);

    public:

        Quaternion() = default;
        explicit Quaternion(NoInit_t);
        explicit Quaternion(IdentityInit_t);
        explicit Quaternion(const Vector& v);
        explicit Quaternion(float ix, float iy, float iz, float iw);
        explicit Quaternion(const Float4& v);

        explicit Quaternion(const Vector& axis, Radians angle);
        explicit Quaternion(AxisAngle axisAngle);

        explicit Quaternion(const EulerAngles& eulerAngles);
        explicit Quaternion(Radians rotX, Radians rotY, Radians rotZ);

        operator __m128& ();
        operator const __m128& () const;

        Float4 ToFloat4() const;
        Vector ToVector() const;

        Vector Length();
        float GetLength() const;

        // Get the angle this rotation represents around the specified axis
        Radians GetAngle() const;

        AxisAngle ToAxisAngle() const;
        EulerAngles ToEulerAngles() const;

        Vector RotateVector(const Vector& vector) const;
        Vector RotateVectorInverse(const Vector& vector) const;

        Quaternion& Conjugate();
        Quaternion GetConjugate() const;

        Quaternion& Negate();
        Quaternion GetNegated() const;

        Quaternion& Invert();
        Quaternion GetInverse() const;

        Quaternion& Normalize();
        Quaternion GetNormalized() const;

        Vector XAxis() const noexcept;
        Vector YAxis() const noexcept;
        Vector ZAxis() const noexcept;

        // Ensure that this rotation is the shortest in terms of the angle (i.e. -5 instead of 355)
        Quaternion& MakeShortestPath();

        // Ensure that this rotation is the shortest in terms of the angle (i.e. -5 instead of 355)
        Quaternion GetShortestPath() const;

        // This function will return the estimated normalized quaternion, this is not super accurate but a lot faster (use with care)
        Quaternion& NormalizeInaccurate();

        // This function will return the estimated normalized quaternion, this is not super accurate but a lot faster (use with care)
        Quaternion GetNormalizedInaccurate() const;

        bool IsNormalized() const;
        bool IsIdentity() const;

        // Concatenate the rotation of this onto rhs and return the result i.e. first rotate by rhs then by this
        // This means order of rotation is right-to-left: child-rotation * parent-rotation
        Quaternion operator*(const Quaternion& rhs) const;
        Quaternion& operator*=(const Quaternion& rhs);

        // Is the distance between this quaternion and another one under the threshold?
        bool IsNearEqual(const Quaternion& rhs, Radians const threshold = Math::DegreesToRadians) const;

        // Exact equality
        bool operator==(const Quaternion& rhs) const;

        // Exact equality
        bool operator!=(const Quaternion& rhs) const;

    private:

        Vector GetSplatW() const;
        float GetW() const;

        Quaternion& operator=(const Vector& v) = delete;

    public:

        __m128 m_data;
    };

    static_assert(sizeof(Vector) == 16, "Quaternion size must be 16 bytes!");
}

#include "Quaternion.inl"


================================================
FILE: MotionCorrection/src/cpp/Math/Quaternion.inl
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Quaternion.h"

namespace Math
{
    inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to)
    {
        ASSERT(from.IsNormalized3() && to.IsNormalized3());

        Quaternion result;

        // Parallel vectors - return zero rotation
        Vector const dot = Vector::Dot3(from, to);
        if (dot.IsGreaterThanEqual4(Vector::OneMinusEpsilon))
        {
            result = Quaternion::Identity;
        }
        // Opposite vectors - return 180 rotation around any orthogonal axis
        else if (dot.IsLessThanEqual4(Vector::EpsilonMinusOne))
        {
            Float4 const fromValues = from.ToFloat4();
            result = Quaternion(-fromValues.m_z, fromValues.m_y, fromValues.m_x, 0);
            result.Normalize();
        }
        else // Calculate quaternion rotation
        {
            Vector const cross = Vector::Cross3(from, to);
            Vector Q = Vector::Select(cross, dot, Vector::Select0001);
            Q += Vector::Select(Vector::Zero, Q.Length4(), Vector::Select0001);
            result = Quaternion(Q);
            result.Normalize();
        }

        return result;
    }

    inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to, const Vector& fallbackRotationAxis)
    {
        ASSERT(from.IsNormalized3() && to.IsNormalized3());

        Quaternion Q(NoInit);

        Vector rotationAxis = from.Cross3(to).GetNormalized3();
        if (rotationAxis.GetLengthSquared3() == 0)
        {
            rotationAxis = fallbackRotationAxis;
        }

        float const dot = from.GetDot3(to);
        if (dot >= (1.0f - Math::Epsilon))
        {
            Q = Quaternion::Identity;
        }
        else
        {
            float const angle = Math::ACos(dot);
            Q = Quaternion(rotationAxis, angle);
        }

        return Q;
    }

    inline Quaternion Quaternion::FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector)
    {
        return FromRotationBetweenNormalizedVectors(
            sourceVector.GetNormalized3(),
                targetVector.GetNormalized3());
    }

    inline Quaternion Quaternion::NLerp(const Quaternion& from, const Quaternion& to, float T)
    {
        ASSERT(T >= 0.0f && T <= 1.0f);

        Quaternion adjustedFrom(from);

        // Ensure that the rotations are in the same direction
        if (Quaternion::Dot(from, to).IsLessThan4(Vector::Zero))
        {
            adjustedFrom.Negate();
        }

        Quaternion result(Vector::Lerp(adjustedFrom.ToVector(), to.ToVector(), T));
        result.Normalize();
        return result;
    }

    inline Quaternion Quaternion::SLerp(const Quaternion& from, const Quaternion& to, float T)
    {
        ASSERT(T >= 0.0f && T <= 1.0f);

        static SIMD::UIntMask const maskSign = { 0x80000000,0x00000000,0x00000000,0x00000000 };
        static __m128 const oneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f };

        Vector const VecT(T);

        Vector cosOmega = Quaternion::Dot(from, to);

        Vector control = cosOmega.LessThan(Vector::Zero);
        Vector sign = Vector::Select(Vector::One, Vector::NegativeOne, control);

        cosOmega = _mm_mul_ps(cosOmega, sign);
        control = cosOmega.LessThan(oneMinusEpsilon);

        Vector sinOmega = _mm_mul_ps(cosOmega, cosOmega);
        sinOmega = _mm_sub_ps(Vector::One, sinOmega);
        sinOmega = _mm_sqrt_ps(sinOmega);

        Vector omega = Vector::ATan2(sinOmega, cosOmega);

        Vector V01 = _mm_shuffle_ps(VecT, VecT, _MM_SHUFFLE(2, 3, 0, 1));
        V01 = _mm_and_ps(V01, SIMD::g_maskXY00);
        V01 = _mm_xor_ps(V01, maskSign);
        V01 = _mm_add_ps(Vector::UnitX, V01);

        Vector S0 = _mm_mul_ps(V01, omega);
        S0 = Vector::Sin(S0);
        S0 = _mm_div_ps(S0, sinOmega);
        S0 = Vector::Select(V01, S0, control);

        Vector S1 = S0.GetSplatY();
        S0 = S0.GetSplatX();

        S1 = _mm_mul_ps(S1, sign);
        Vector result = _mm_mul_ps(from, S0);
        S1 = _mm_mul_ps(S1, to);
        result = _mm_add_ps(result, S1);

        return Quaternion(result);
    }

    inline Quaternion Quaternion::FastSLerp(const Quaternion& q0, const Quaternion& q1, float t)
    {
        // Precomputed constants
        constexpr float const mu = 1.85298109240830f;
        static Vector const u0123 = _mm_setr_ps(1.f / (1 * 3), 1.f / (2 * 5), 1.f / (3 * 7), 1.f / (4 * 9));
        static Vector const u4567 = _mm_setr_ps(1.f / (5 * 11), 1.f / (6 * 13), 1.f / (7 * 15), mu / (8 * 17));
        static Vector const v0123 = _mm_setr_ps(1.f / 3, 2.f / 5, 3.f / 7, 4.f / 9);
        static Vector const v4567 = _mm_setr_ps(5.f / 11, 6.f / 13, 7.f / 15, mu * 8 / 17);
        static Vector const vSignMask = _mm_set1_ps(-0.f);

        // Common code for computing the scalar coefficients of SLERP
        auto CalculateCoefficient = [](Vector vT, Vector xm1)
        {
            Vector const vTSquared = vT * vT;

            // ( b4, b5, b6, b7 ) = ( x-1 ) * ( u4 * t^2 - v4, u5 * t^2 - v5, u6 * t^2 - v6, u7 * t^2 - v7 )
            Vector b4567 = Vector::MultiplySubtract(u4567, vTSquared, v4567);
            b4567 *= xm1;

            // ( b7, b7, b7, b7 )
            Vector b = b4567.GetSplatW();
            Vector c = b + Vector::One;

            // ( b6, b6, b6, b6 )
            b = b4567.GetSplatZ();
            c = Vector::MultiplyAdd(b, c, Vector::One);

            // ( b5, b5, b5, b5 )
            b = b4567.GetSplatY();
            c = Vector::MultiplyAdd(b, c, Vector::One);

            // ( b4, b4, b4, b4 )
            b = b4567.GetSplatX();
            c = Vector::MultiplyAdd(b, c, Vector::One);

            // ( b0, b1, b2, b3 ) =
            // ( x-1)*(u0* t^2-v0, u1 * t^2 -v1, u2* t^2-v2, u3* t^2-v3 )
            Vector b0123 = Vector::MultiplySubtract(u0123, vTSquared, v0123);
            b0123 *= xm1;

            // ( b3, b3, b3, b3 )
            b = b0123.GetSplatW();
            c = Vector::MultiplyAdd(b, c, Vector::One);

            // ( b2, b2, b2, b2 )
            b = b0123.GetSplatZ();
            c = Vector::MultiplyAdd(b, c, Vector::One);

            // ( b1, b1, b1, b1 )
            b = b0123.GetSplatY();
            c = Vector::MultiplyAdd(b, c, Vector::One);

            // ( b0, b0, b0, b0 )
            b = b0123.GetSplatX();
            c = Vector::MultiplyAdd(b, c, Vector::One);
            c *= vT;

            return c;
        };

        Vector x = Vector::Dot4(q0.m_data, q1.m_data); // cos ( theta ) in all components

        Vector sign = _mm_and_ps(vSignMask, x);
        x = _mm_xor_ps(sign, x);
        Vector localQ1 = _mm_xor_ps(sign, q1);

        Vector xm1 = x - Vector::One;

        Vector cT = CalculateCoefficient(Vector(t), xm1);
        Vector cD = CalculateCoefficient(Vector(1.0f - t), xm1);
        cT = cT * localQ1;

        Quaternion result(Vector::MultiplyAdd(cD, q0.m_data, cT));
        return result;
    }

    inline Quaternion Quaternion::SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t)
    {
        ASSERT(t >= 0.0f && t <= 1.0f);

        Quaternion const q03 = Quaternion::SLerp(q0, q3, t);
        Quaternion const q12 = Quaternion::SLerp(q1, q2, t);
        t = (t - (t * t)) * 2;
        Quaternion const result = Quaternion::SLerp(q03, q12, t);
        return result;
    }

    inline Quaternion Quaternion::Delta(const Quaternion& from, const Quaternion& to)
    {
        return to * from.GetInverse();
    }

    inline Vector Quaternion::Dot(const Quaternion& q0, const Quaternion& q1)
    {
        return Vector::Dot4(q0.m_data, q1.m_data);
    }

    inline Radians Quaternion::Distance(const Quaternion& q0, const Quaternion& q1)
    {
        float const dot = Math::Clamp(Dot(q0, q1).ToFloat(), -1.0f, 1.0f);
        return Radians(2 * Math::ACos(Math::Abs(dot)));
    }

    inline Quaternion::Quaternion(NoInit_t)
    {
    }

    inline Quaternion::Quaternion(IdentityInit_t)
        : m_data(Vector::UnitW.m_data)
    {
    }

    inline Quaternion::Quaternion(const Vector& v)
        : m_data(v.m_data)
    {
    }

    inline Quaternion::Quaternion(float ix, float iy, float iz, float iw)
    {
        m_data = _mm_set_ps(iw, iz, iy, ix);
    }

    inline Quaternion::Quaternion(const Float4& v)
        : Quaternion(v.m_x, v.m_y, v.m_z, v.m_w)
    {
    }

    inline Quaternion::Quaternion(const Vector& axis, Radians angle)
    {
        ASSERT(axis.IsNormalized3());

        auto N = _mm_and_ps(axis, SIMD::g_maskXYZ0);
        N = _mm_or_ps(N, Vector::UnitW);
        auto scale = _mm_set_ps1(0.5f * (float)angle);

        Vector sine, cosine;
        Vector::SinCos(sine, cosine, scale);

        scale = _mm_and_ps(sine, SIMD::g_maskXYZ0);
        cosine = _mm_and_ps(cosine, SIMD::g_mask000W);
        scale = _mm_or_ps(scale, cosine);

        N = _mm_mul_ps(N, scale);
        m_data = N;
    }

    inline Quaternion::Quaternion(AxisAngle axisAngle)
        : Quaternion(Vector(axisAngle.m_axis), axisAngle.m_angle)
    {
    }

    inline Quaternion::Quaternion(const EulerAngles& eulerAngles)
    {
        auto const rotationX = Quaternion(Vector::UnitX, eulerAngles.m_x);
        auto const rotationY = Quaternion(Vector::UnitY, eulerAngles.m_y);
        auto const rotationZ = Quaternion(Vector::UnitZ, eulerAngles.m_z);

        // Rotation order is XYZ - all in global space, hence the order is reversed
        m_data = (rotationX * rotationY * rotationZ).GetNormalized().m_data;
    }

    inline Quaternion::Quaternion(Radians rotX, Radians rotY, Radians rotZ)
        : Quaternion(EulerAngles(rotX, rotY, rotZ))
    {
    }

    inline Quaternion::operator __m128& ()
    {
        return m_data;
    }

    inline Quaternion::operator const __m128& () const
    {
        return m_data;
    }

    inline Float4 Quaternion::ToFloat4() const
    {
        Float4 v;
        _mm_storeu_ps(&v.m_x, m_data);
        return v;
    }

    inline Vector Quaternion::ToVector() const
    {
        return Vector(m_data);
    }

    inline Vector Quaternion::Length()
    {
        return ToVector().Length4();
    }

    inline float Quaternion::GetLength() const
    {
        return ToVector().GetLength4();
    }

    inline Radians Quaternion::GetAngle() const
    {
        return Radians(2.0f * Math::ACos(GetW()));
    }

    inline AxisAngle Quaternion::ToAxisAngle() const
    {
        return AxisAngle(ToVector(), Radians(2.0f * Math::ACos(GetW())));
    }

    inline Vector Quaternion::RotateVector(const Vector& vector) const
    {
        Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110));
        Quaternion const result = GetConjugate() * A;
        return (result * *this).ToVector();
    }

    inline Vector Quaternion::RotateVectorInverse(const Vector& vector) const
    {
        Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110));
        Quaternion const result = *this * A;
        return (result * GetConjugate()).ToVector();
    }

    inline Quaternion& Quaternion::Conjugate()
    {
        static __m128 const conj = { -1.0f, -1.0f, -1.0f, 1.0f };
        m_data = _mm_mul_ps(*this, conj);
        return *this;
    }

    inline Quaternion Quaternion::GetConjugate() const
    {
        Quaternion q = *this;
        q.Conjugate();
        return q;
    }
    inline Quaternion& Quaternion::Negate()
    {
        m_data = _mm_mul_ps(*this, Vector::NegativeOne);
        return *this;
    }

    inline Quaternion Quaternion::GetNegated() const
    {
        Quaternion q = *this;
        q.Negate();
        return q;
    }

    inline Quaternion& Quaternion::Invert()
    {
        Vector const conjugate(GetConjugate().m_data);
        Vector const length = ToVector().Length4();
        Vector const mask = length.LessThanEqual(Vector::Epsilon);
        Vector const result = conjugate / length;
        m_data = result.Select(result, Vector::Zero, mask);
        return *this;
    }

    inline Quaternion Quaternion::GetInverse() const
    {
        Quaternion q = *this;
        q.Invert();
        return q;
    }

    inline Quaternion& Quaternion::Normalize()
    {
        m_data = ToVector().GetNormalized4().m_data;
        return *this;
    }

    inline Quaternion Quaternion::GetNormalized() const
    {
        Quaternion q = *this;
        q.Normalize();
        return q;
    }

    inline Vector Quaternion::XAxis() const noexcept
    {
        const float x = _mm_cvtss_f32(m_data);
        const float y = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(1, 1, 1, 1)));
        const float z = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(2, 2, 2, 2)));
        const float w = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(3, 3, 3, 3)));

        const float s = 2.0f * w;
        const float x2 = 2.0f * x;

        return Vector(
            x2 * x + s * w - 1.0f,
                x2 * y + s * z,
                    x2 * z + s * -y);
    }

    inline Vector Quaternion::YAxis() const noexcept
    {
        const float x = _mm_cvtss_f32(m_data);
        const float y = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(1, 1, 1, 1)));
        const float z = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(2, 2, 2, 2)));
        const float w = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(3, 3, 3, 3)));

        const float s = 2.0f * w;
        const float y2 = 2.0f * y;

        return Vector(
            y2 * x + s * -z,
                y2 * y + s * w - 1.0f,
                    y2 * z + s * x);
    }

    inline Vector Quaternion::ZAxis() const noexcept
    {
        const float x = _mm_cvtss_f32(m_data);
        const float y = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(1, 1, 1, 1)));
        const float z = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(2, 2, 2, 2)));
        const float w = _mm_cvtss_f32(
            _mm_shuffle_ps(m_data, m_data,
                _MM_SHUFFLE(3, 3, 3, 3)));

        const float s = 2.0f * w;
        const float z2 = 2.0f * z;

        return Vector(
            x * z2 + s * y,
                y * z2 + s * -x,
                    z * z2 + s * w - 1.0f);
    }

    inline Quaternion& Quaternion::MakeShortestPath()
    {
        // If we have a > 180 angle, negate
        // w < 0.0f is the same as dot( identity, q ) < 0
        if (GetW() < 0.0f)
        {
            Negate();
        }

        return *this;
    }

    inline Quaternion Quaternion::GetShortestPath() const
    {
        Quaternion sp = *this;
        sp.MakeShortestPath();
        return sp;
    }

    inline Quaternion& Quaternion::NormalizeInaccurate()
    {
        *this = GetNormalizedInaccurate();
        return *this;
    }

    inline Quaternion Quaternion::GetNormalizedInaccurate() const
    {
        __m128 vLengthSq = _mm_mul_ps(m_data, m_data);
        __m128 vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2));
        vLengthSq = _mm_add_ps(vLengthSq, vTemp);
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0));
        vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0));
        vLengthSq = _mm_add_ps(vLengthSq, vTemp);
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2));

        // Get the reciprocal and mul to perform the normalization
        Quaternion result;
        result.m_data = _mm_rsqrt_ps(vLengthSq);
        result.m_data = _mm_mul_ps(result.m_data, m_data);
        return result;
    }

    inline bool Quaternion::IsNormalized() const
    {
        return ToVector().IsNormalized4();
    }

    inline bool Quaternion::IsIdentity() const
    {
        return ToVector().IsEqual3(Vector::UnitW);
    }

    inline Quaternion Quaternion::operator*(const Quaternion& rhs) const
    {
        static const __m128 controlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f };
        static const __m128 controlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f };
        static const __m128 controlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f };

        // Copy to SSE registers and use as few as possible for x86
        __m128 Q2X = rhs;
        __m128 Q2Y = rhs;
        __m128 Q2Z = rhs;
        __m128 vResult = rhs;
        // Splat with one instruction
        vResult = _mm_shuffle_ps(vResult, vResult, _MM_SHUFFLE(3, 3, 3, 3));
        Q2X = _mm_shuffle_ps(Q2X, Q2X, _MM_SHUFFLE(0, 0, 0, 0));
        Q2Y = _mm_shuffle_ps(Q2Y, Q2Y, _MM_SHUFFLE(1, 1, 1, 1));
        Q2Z = _mm_shuffle_ps(Q2Z, Q2Z, _MM_SHUFFLE(2, 2, 2, 2));
        // Retire Q1 and perform Q1*Q2W
        vResult = _mm_mul_ps(vResult, *this);
        __m128 Q1Shuffle = *this;
        // Shuffle the copies of Q1
        Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
        // Mul by Q1WZYX
        Q2X = _mm_mul_ps(Q2X, Q1Shuffle);
        Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1));
        // Flip the signs on m_y and m_z
        Q2X = _mm_mul_ps(Q2X, controlWZYX);
        // Mul by Q1ZWXY
        Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle);
        Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
        // Flip the signs on m_z and m_w
        Q2Y = _mm_mul_ps(Q2Y, controlZWXY);
        // Mul by Q1YXWZ
        Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle);
        vResult = _mm_add_ps(vResult, Q2X);
        // Flip the signs on m_x and m_w
        Q2Z = _mm_mul_ps(Q2Z, controlYXWZ);
        Q2Y = _mm_add_ps(Q2Y, Q2Z);
        vResult = _mm_add_ps(vResult, Q2Y);

        return Quaternion(vResult);
    }

    inline Quaternion& Quaternion::operator*=(const Quaternion& rhs)
    {
        *this = *this * rhs;
        return *this;
    }

    inline bool Quaternion::IsNearEqual(const Quaternion& rhs, Radians const threshold) const
    {
        return Quaternion::Distance(*this, rhs) <= threshold;
    }

    inline bool Quaternion::operator==(const Quaternion& rhs) const
    {
        return ToVector() == rhs.ToVector();
    }

    inline bool Quaternion::operator!=(const Quaternion& rhs) const
    {
        return !operator==(rhs);
    }

    inline Vector Quaternion::GetSplatW() const
    {
        return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3));
    }

    inline float Quaternion::GetW() const
    {
        auto vTemp = GetSplatW();
        return _mm_cvtss_f32(vTemp);
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/SIMD.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include <stdint.h>
#include <immintrin.h>

namespace SIMD
{
    struct alignas( 16 ) IntMask
    {
        inline operator __m128( ) const { return reinterpret_cast<__m128 const&>( *this ); }
        inline operator __m128i( ) const { return _mm_castps_si128( *this ); }
        inline operator __m128d( ) const { return _mm_castps_pd( *this ); }

        int32_t i[4];
    };

    struct alignas( 16 ) UIntMask
    {
        inline operator __m128( ) const { return reinterpret_cast<__m128 const&>( *this ); }
        inline operator __m128i( ) const { return _mm_castps_si128( *this ); }
        inline operator __m128d( ) const { return _mm_castps_pd( *this ); }

        uint32_t v[4];
    };

    struct alignas( 16 ) FloatMask
    {
        inline operator __m128() const { return reinterpret_cast<__m128 const&>( *this ); }
        inline operator __m128i() const { return _mm_castps_si128( *this ); }
        inline operator __m128d() const { return _mm_castps_pd( *this ); }

        float v[4];
    };

    // Int Operations
    //-------------------------------------------------------------------------

    namespace Int
    {
        FORCE_INLINE bool Equal( __m128 V1, __m128 V2 )
        {
            __m128i vTemp = _mm_cmpeq_epi32( _mm_castps_si128( V1 ), _mm_castps_si128( V2 ) );
            return ( ( ( _mm_movemask_ps( _mm_castsi128_ps( vTemp ) ) & 7 ) == 7 ) != 0 );
        }

        FORCE_INLINE bool NotEqual( __m128 V1, __m128 V2 )
        {
            __m128i vTemp = _mm_cmpeq_epi32( _mm_castps_si128( V1 ), _mm_castps_si128( V2 ) );
            return ( ( _mm_movemask_ps( _mm_castsi128_ps( vTemp ) ) != 0xF ) != 0 );
        }

        FORCE_INLINE __m128 And( __m128 V1, __m128 V2 )
        {
            return _mm_and_ps( V1, V2 );
        }

        FORCE_INLINE __m128 Or( __m128 V1, __m128 V2 )
        {
            __m128i V = _mm_or_si128( _mm_castps_si128( V1 ), _mm_castps_si128( V2 ) );
            return _mm_castsi128_ps( V );
        }
    }

    //-------------------------------------------------------------------------

    static __m128 const g_sinCoefficients0 = { -0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f };
    static __m128 const g_sinCoefficients1 = { -2.3889859e-08f, -0.16665852f, +0.0083139502f, -0.00018524670f };
    static __m128 const g_cosCoefficients0 = { -0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f };
    static __m128 const g_cosCoefficients1 = { -2.6051615e-07f, -0.49992746f, +0.041493919f, -0.0012712436f };
    static __m128 const g_tanCoefficients0 = { 1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f };
    static __m128 const g_tanCoefficients1 = { 2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f };
    static __m128 const g_tanCoefficients2 = { 5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f };
    static __m128 const g_arcCoefficients0 = { +1.5707963050f, -0.2145988016f, +0.0889789874f, -0.0501743046f };
    static __m128 const g_arcCoefficients1 = { +0.0308918810f, -0.0170881256f, +0.0066700901f, -0.0012624911f };
    static __m128 const g_aTanCoefficients0 = { -0.3333314528f, +0.1999355085f, -0.1420889944f, +0.1065626393f };
    static __m128 const g_aTanCoefficients1 = { -0.0752896400f, +0.0429096138f, -0.0161657367f, +0.0028662257f };
    static __m128 const g_aTanEstCoefficients0 = { +0.999866f, +0.999866f, +0.999866f, +0.999866f };
    static __m128 const g_aTanEstCoefficients1 = { -0.3302995f, +0.180141f, -0.085133f, +0.0208351f };
    static __m128 const g_tanEstCoefficients = { 2.484f, -1.954923183e-1f, 2.467401101f, Math::OneDivPi };
    static __m128 const g_arcEstCoefficients = { +1.5707288f,-0.2121144f,+0.0742610f,-0.0187293f };
    static __m128 const g_aTan2Constants = { Math::Pi, Math::PiDivTwo, Math::PiDivFour, 2.3561944905f /* 3/4 Pi */ };

    //-------------------------------------------------------------------------

    static FloatMask const g_noFraction = { 8388608.0f,8388608.0f,8388608.0f,8388608.0f };
    static IntMask const g_absMask = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
    static UIntMask const g_trueMask = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
    static UIntMask const g_signMask = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
    static UIntMask const g_maskX000 = { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 };
    static UIntMask const g_mask0Y00 = { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 };
    static UIntMask const g_mask00Z0 = { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 };
    static UIntMask const g_mask000W = { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF };
    static UIntMask const g_maskXY00 = { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 };
    static UIntMask const g_maskXYZ0 = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 };
}


================================================
FILE: MotionCorrection/src/cpp/Math/Scalar.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Compiler.h"
#include "Debug.h"

#include "Constants.h"

#include <math.h>
#include <stdint.h>

//
// Scalar related methods
//

namespace Math
{
    FORCE_INLINE float Sin( float value ) { return sinf( value ); }
    FORCE_INLINE float Cos( float value ) { return cosf( value ); }
    FORCE_INLINE float Tan( float value ) { return tanf( value ); }

    FORCE_INLINE float ASin( float value ) { return asinf( value ); }
    FORCE_INLINE float ACos( float value ) { return acosf( value ); }
    FORCE_INLINE float ATan( float value ) { return atanf( value ); }
    FORCE_INLINE float ATan2( float y, float x ) { return atan2f( y, x ); }

    FORCE_INLINE float Cosec( float value ) { return 1.0f / sinf( value ); }
    FORCE_INLINE float Sec( float value ) { return 1.0f / cosf( value ); }
    FORCE_INLINE float Cot( float value ) { return 1.0f / tanf( PiDivTwo - value ); }

    FORCE_INLINE float Pow( float x, float y ) { return powf( x, y ); }
    FORCE_INLINE float Sqr( float value ) { return value * value; }
    FORCE_INLINE float Sqrt( float value ) { return sqrtf( value ); }

    FORCE_INLINE float Log( float value ) { return logf( value ); }
    FORCE_INLINE float Log2f( float value ) { return log2f( value ); }

    FORCE_INLINE float AddToMovingAverage( float currentAverage, uint64_t numCurrentSamples, float newValue )
    {
        return currentAverage + ( ( newValue - currentAverage ) / float( numCurrentSamples + 1 ) );
    }

    FORCE_INLINE float Abs( float a ) { return fabsf( a ); }
    FORCE_INLINE double Abs( double a ) { return fabs( a ); }
    FORCE_INLINE int8_t Abs( int8_t a ) { return (int8_t) abs( a ); }
    FORCE_INLINE int16_t Abs( int16_t a ) { return (int16_t) abs( a ); }
    FORCE_INLINE int32_t Abs( int32_t a ) { return labs( a ); }
    FORCE_INLINE int64_t Abs( int64_t a ) { return llabs( a ); }

    FORCE_INLINE float Reciprocal( float r ) { return 1.0f / r; }
    FORCE_INLINE double Reciprocal( double r ) { return 1.0 / r; }

    template<typename T>
    FORCE_INLINE T Min( T a, T b ) { return a <= b ? a : b; }

    template<typename T>
    FORCE_INLINE T Max( T a, T b ) { return a >= b ? a : b; }

    template<typename T>
    FORCE_INLINE T AbsMin( T a, T b ) { return Abs( a ) <= Abs( b ) ? a : b; }

    template<typename T>
    FORCE_INLINE T AbsMax( T a, T b ) { return Abs( a ) >= Abs( b ) ? a : b; }

    template<typename T>
    FORCE_INLINE T Sqrt( T a ) { return sqrt( a ); }

    template<typename T>
    FORCE_INLINE T Clamp( T value, T lowerBound, T upperBound )
    {
        ASSERT( lowerBound <= upperBound );
        return Min( Max( value, lowerBound ), upperBound );
    }

    template<typename T>
    FORCE_INLINE bool IsInRangeInclusive( T value, T lowerBound, T upperBound )
    {
        ASSERT( lowerBound < upperBound );
        return value >= lowerBound && value <= upperBound;
    }

    template<typename T>
    FORCE_INLINE bool IsInRangeExclusive( T value, T lowerBound, T upperBound )
    {
        ASSERT( lowerBound < upperBound );
        return value > lowerBound && value < upperBound;
    }

    // Decomposes a float into integer and remainder portions, remainder is return and the integer result is stored in the integer portion
    FORCE_INLINE float ModF( float value, float& integerPortion )
    {
        return modff( value, &integerPortion );
    }

    // Returns the floating point remainder of x/y
    FORCE_INLINE float FModF( float x, float y )
    {
        return fmodf( x, y );
    }

    template<typename T>
    FORCE_INLINE T Lerp( T A, T B, float t )
    {
        return A + ( B - A ) * t;
    }

    FORCE_INLINE float PercentageThroughRange( float value, float lowerBound, float upperBound )
    {
        ASSERT( lowerBound < upperBound );
        return Clamp( value, lowerBound, upperBound ) / ( upperBound - lowerBound );
    }

    FORCE_INLINE bool IsNearEqual( float value, float comparand, float epsilon = Epsilon )
    {
        return fabsf( value - comparand ) <= epsilon;
    }

    FORCE_INLINE bool IsNearZero( float value, float epsilon = Epsilon )
    {
        return fabsf( value ) <= epsilon;
    }

    FORCE_INLINE bool IsNearEqual( double value, double comparand, double epsilon = Epsilon )
    {
        return fabs( value - comparand ) <= epsilon;
    }

    FORCE_INLINE bool IsNearZero( double value, double epsilon = Epsilon )
    {
        return fabs( value ) <= epsilon;
    }

    FORCE_INLINE float Ceiling( float value )
    {
        return ceilf( value );
    }

    FORCE_INLINE int32_t CeilingToInt( float value )
    {
        return (int32_t) ceilf( value );
    }

    FORCE_INLINE float Floor( float value )
    {
        return floorf( value );
    }

    FORCE_INLINE int32_t FloorToInt( float value )
    {
        return (int32_t) floorf( value );
    }

    FORCE_INLINE float Round( float value )
    {
        return roundf( value );
    }

    FORCE_INLINE int32_t RoundToInt( float value )
    {
        return (int32_t) roundf( value );
    }

    inline float RemapRange( float value, float fromRangeBegin, float fromRangeEnd, float toRangeBegin, float toRangeEnd )
    {
        float const fromRangeLength = fromRangeEnd - fromRangeBegin;
        float const percentageThroughFromRange = Clamp( ( value - fromRangeBegin ) / fromRangeLength, 0.0f, 1.0f );
        float const toRangeLength = toRangeEnd - toRangeBegin;
        float const result = toRangeBegin + ( percentageThroughFromRange * toRangeLength );

        return result;
    }

    FORCE_INLINE float Square( float value )
    {
        return value * value;
    }

    FORCE_INLINE float SmoothStep01( float value )
    {
        value = Clamp( value, 0.0f, 1.0f );
        return value * value * ( 3.0f - 2.0f * value );
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Transform.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "Transform.h"

namespace Math
{
    Transform const Transform::Identity = Transform(Quaternion(0, 0, 0, 1), Vector(0, 0, 0, 1), 1.0f);

    void Transform::SanitizeScaleValue()
    {
        if (Math::IsNearEqual(GetScale(), 1.0f, Math::LargeEpsilon))
        {
            SetScale(1.0f);
        }
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Transform.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Matrix.h"

namespace Math
{
    //
    // VQS Transform
    //

    class Transform
    {
    public:

        static Transform const Identity;

        static Transform FromRotation(const Quaternion& rotation);
        static Transform FromTranslation(const Vector& translation);
        static Transform FromScale(float uniformScale);
        static Transform FromTranslationAndScale(const Vector& translation, float uniformScale);
        static Transform FromRotationBetweenVectors(const Vector sourceVector, const Vector targetVector);

        // Linearly interpolate between two transforms - uses NLerp for rotations
        static Transform Lerp(const Transform& from, const Transform& to, float t);

        // Spherically interpolate between two transforms - uses SLerp for rotations
        static Transform Slerp(const Transform& from, const Transform& to, float t);

        // Spherically interpolate between two transforms - uses FastSLerp (SLerp approximation) for rotations
        static Transform FastSlerp(const Transform& from, const Transform& to, float t);

        // Calculate a delta transform that you can concatenate to the
        // 'from' transform to get the 'to' transform. Properly handles the non-uniform scaling case.
        static Transform Delta(const Transform& from, const Transform& to);

        // Calculates a delta transform that you can concatenate to the
        // 'from' transform to get the 'to' transform (ignoring scale)
        static Transform DeltaNoScale(const Transform& from, const Transform& to);

        static void DirectlySetRotation(Transform& transform, Quaternion&& rotation);
        static void DirectlySetRotation(Transform& transform, const Quaternion& rotation);
        static void DirectlySetTranslationScale(Transform& transform, Vector&& translationScale);
        static void DirectlySetTranslationScale(Transform& transform, const Vector& translationScale);

    public:

        Transform() = default;

        explicit Transform(NoInit_t);
        explicit Transform(const Matrix& m);
        explicit Transform(
            const Quaternion& rotation,
                const Vector& translation = Vector(0, 0, 0, 0),
                    float scale = 1.0f);
        explicit Transform(const AxisAngle& rotation);

        Matrix ToMatrix() const;
        Matrix ToMatrixNoScale() const;
        EulerAngles ToEulerAngles() const;

        Vector GetAxisX() const;
        Vector GetAxisY() const;
        Vector GetAxisZ() const;

        Vector GetRightVector() const;
        Vector GetForwardVector() const;
        Vector GetUpVector() const;

        bool IsIdentity() const;
        bool IsRigidTransform() const;
        void MakeRigidTransform();

        //
        // Inverse and Deltas
        //

        // Invert this transform.
        // If you want a delta transform that you can
        // concatenate, then you should use the 'Delta' functions
        Transform& Inverse();

        // Get the inverse of this transform.
        // If you want a delta transform that you can
        // concatenate, then you should use the 'Delta' functions
        Transform GetInverse() const;

        // Return the delta required to a given target
        // transform (i.e., what do we need to add to reach that transform)
        Transform GetDeltaToOther(const Transform& targetTransform) const;

        // Return the delta relative from a given a start
        // transform (i.e., how much do we differ from it)
        Transform GetDeltaFromOther(const Transform& startTransform) const;

        //
        // Rotation

        const Quaternion& GetRotation() const;
        void SetRotation(const Quaternion& rotation);
        void AddRotation(const Quaternion& delta);

        //
        // Translation
        //

        // Get the translation for this transform
        // NOTE: you cannot rely on the W value as that will be the scale
        const Vector& GetTranslation() const;

        // Get the translation and scale for this transform
        const Vector& GetTranslationAndScale() const;

        // Set the translation
        void SetTranslation(const Vector& newTranslation);

        // Set the translation and scale simultaneously
        void SetTranslationAndScale(const Vector& newTranslationScale);

        // Add an offset to the current translation
        void AddTranslation(const Vector& translationDelta);

        // Get the translation as a homogeneous coordinates' vector (W=0)
        Vector GetTranslationAsVector() const;

        // Get the translation as a homogeneous coordinates' point (W=1)
        Vector GetTranslationAsPoint() const;

        //
        // Scale
        //

        float GetScale() const;
        Vector GetScaleVector() const;
        Vector GetInverseScaleVector() const;
        void SetScale(float uniformScale);
        bool HasScale() const;
        bool HasNegativeScale() const;

        // This function will sanitize the scale values to remove any
        // trailing values from scale factors i.e. 1.000000012 will be converted to 1
        // This is primarily needed in import steps where scale values
        // might be sampled from curves or have multiple conversions applied resulting in variance.
        void SanitizeScaleValue();

        //
        // Transformations
        //

        Vector TranslateVector(const Vector& vector) const;
        Vector ScaleVector(const Vector& vector) const;
        Vector TransformPoint(const Vector& vector) const;
        Vector TransformPointNoScale(const Vector& vector) const;

        // Rotate a vector (same as TransformVectorNoScale)
        Vector RotateVector(const Vector& vector) const;

        // Rotate a vector (same as TransformVectorNoScale)
        Vector TransformNormal(const Vector& vector) const;

        // Unrotate a vector (same as InverseTransformVectorNoScale)
        Vector InverseRotateVector(const Vector& vector) const;

        // Invert the operation order when doing inverse transformation: first translation then rotation then scale
        Vector InverseTransformPoint(const Vector& point) const;

        // Invert the operation order when doing inverse transformation: first translation then rotation
        Vector InverseTransformPointNoScale(const Vector& point) const;

        // Applies scale and rotation to a vector (no translation)
        Vector TransformVector(const Vector& vector) const;

        // Rotate a vector
        Vector TransformVectorNoScale(const Vector& vector) const;

        // Invert the operation order when performing inverse transformation: first rotation then scale
        Vector InverseTransformVector(const Vector& vector) const;

        // Unrotate a vector
        Vector InverseTransformVectorNoScale(const Vector& vector) const;

        // WARNING: The results from multiplying transforms with shear or skew is ill-defined
        Transform operator*(const Transform& rhs) const;

        // WARNING: The results from multiplying transforms with shear or skew is ill-defined
        Transform& operator*=(const Transform& rhs);

        //
        // Operators
        //

        bool IsNearEqual(
            const Transform& rhs,
                const Radians angleThreshold = Math::DegreesToRadians,
                    float translationScaleThreshold = Math::Epsilon) const;

        // Exact equality
        bool operator==(const Transform& rhs) const;

        bool operator!=(const Transform& rhs) const;

    private:

        Quaternion m_rotation = Quaternion(0, 0, 0, 1);
        Vector m_translationScale = Vector(0, 0, 0, 1);
    };
}

#include "Transform.inl"


================================================
FILE: MotionCorrection/src/cpp/Math/Transform.inl
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Transform.h"

namespace Math
{
    inline Transform Transform::FromRotation(const Quaternion& rotation)
    {
        return Transform(rotation);
    }

    inline Transform Transform::FromTranslation(const Vector& translation)
    {
        return Transform(Quaternion::Identity, translation);
    }

    inline Transform Transform::FromScale(float uniformScale)
    {
        return Transform(Quaternion::Identity, Vector::Zero, uniformScale);
    }

    inline Transform Transform::FromTranslationAndScale(const Vector& translation, float uniformScale)
    {
        return Transform(Quaternion::Identity, translation, uniformScale);
    }

    inline Transform Transform::FromRotationBetweenVectors(const Vector sourceVector, const Vector targetVector)
    {
        return Transform(Quaternion::FromRotationBetweenNormalizedVectors(sourceVector, targetVector));
    }

    inline Transform Transform::Lerp(const Transform& from, const Transform& to, float t)
    {
        Quaternion const rotation = Quaternion::NLerp(Quaternion(from.m_rotation), Quaternion(to.m_rotation), t);
        Vector const translationAndScale = Vector::Lerp(from.m_translationScale, to.m_translationScale, t);

        Transform lerped(NoInit);
        lerped.m_rotation = rotation;
        lerped.m_translationScale = translationAndScale;
        return lerped;
    }

    inline Transform Transform::Slerp(const Transform& from, const Transform& to, float t)
    {
        Quaternion const rotation = Quaternion::SLerp(Quaternion(from.m_rotation), Quaternion(to.m_rotation), t);
        Vector const translationAndScale = Vector::Lerp(Vector(from.m_translationScale), Vector(to.m_translationScale), t);

        Transform lerped(NoInit);
        lerped.m_rotation = rotation;
        lerped.m_translationScale = translationAndScale;
        return lerped;
    }

    inline Transform Transform::FastSlerp(const Transform& from, const Transform& to, float t)
    {
        Quaternion const rotation = Quaternion::FastSLerp(Quaternion(from.m_rotation), Quaternion(to.m_rotation), t);
        Vector const translationAndScale = Vector::Lerp(Vector(from.m_translationScale), Vector(to.m_translationScale), t);

        Transform lerped(NoInit);
        lerped.m_rotation = rotation;
        lerped.m_translationScale = translationAndScale;
        return lerped;
    }

    inline Transform Transform::Delta(const Transform& from, const Transform& to)
    {
        ASSERT(from.m_rotation.IsNormalized() && to.m_rotation.IsNormalized());
        ASSERT(!from.m_translationScale.IsW0() && !to.m_translationScale.IsW0());

        Transform result;

        Vector const inverseScale = from.GetInverseScaleVector();
        Vector const deltaScale = to.GetScaleVector() * inverseScale;

        // If we have negative scaling, we need to use matrices to calculate the deltas
        Vector const minScale = Vector::Min(from.m_translationScale.GetSplatW(), to.m_translationScale.GetSplatW());
        if (minScale.IsAnyLessThan(Vector::Zero))
        {
            // Multiply the transforms using matrices to get the correct rotation and then remove the scale;
            Matrix const toMtx = to.ToMatrix();
            Matrix const fromMtx = from.ToMatrix();
            Matrix resultMtx = toMtx * fromMtx.GetInverse();
            resultMtx.RemoveScaleFast();

            // Apply back the signs from the final scale
            Vector const sign = deltaScale.GetSign();
            resultMtx[0] *= sign.GetSplatX();
            resultMtx[1] *= sign.GetSplatY();
            resultMtx[2] *= sign.GetSplatZ();

            result.m_rotation = resultMtx.GetRotation();
            ASSERT(result.m_rotation.IsNormalized());
            result.m_translationScale = Vector::Select(resultMtx.GetTranslation(), deltaScale, Vector::Select0001);
        }
        else
        {
            Quaternion const fromInverseRotation = from.m_rotation.GetInverse();
            result.m_rotation = to.m_rotation * fromInverseRotation;

            Vector const deltaTranslation = to.m_translationScale - from.m_translationScale;
            Vector const translation = fromInverseRotation.RotateVector(deltaTranslation) * inverseScale;
            result.m_translationScale = Vector::Select(translation, deltaScale, Vector::Select0001);
        }

        return result;
    }

    inline Transform Transform::DeltaNoScale(const Transform& from, const Transform& to)
    {
        Quaternion const inverseFromRotation = from.m_rotation.GetInverse();
        Vector const deltaTranslation = to.GetTranslation() - from.GetTranslation();

        Transform delta;
        delta.m_rotation = to.m_rotation * inverseFromRotation;
        delta.m_translationScale = inverseFromRotation.RotateVector(deltaTranslation).GetWithW1();
        return delta;
    }

    inline void Transform::DirectlySetRotation(Transform& transform, Quaternion&& rotation)
    {
        transform.m_rotation = rotation;
    }

    inline void Transform::DirectlySetRotation(Transform& transform, const Quaternion& rotation)
    {
        transform.m_rotation = rotation;
    }

    inline void Transform::DirectlySetTranslationScale(Transform& transform, Vector&& translationScale)
    {
        transform.m_translationScale = translationScale;
    }

    inline void Transform::DirectlySetTranslationScale(Transform& transform, const Vector& translationScale)
    {
        transform.m_translationScale = translationScale;
    }

    inline Transform::Transform(NoInit_t)
        : m_rotation(NoInit)
        , m_translationScale(NoInit)
    {
    }

    inline Transform::Transform(const Matrix& m)
    {
        Vector mTranslation, mScale;
        m.Decompose(m_rotation, mTranslation, mScale);
        ASSERT(Math::IsNearEqual(mScale.GetX(), mScale.GetY()) && Math::IsNearEqual(mScale.GetY(),mScale.GetZ()));
        m_translationScale = Vector::Select(mTranslation, mScale, Vector::Select0001);
    }

    inline Transform::Transform(const Quaternion& rotation, const Vector& translation, float scale)
        : m_rotation(rotation)
        , m_translationScale(Vector::Select(translation, Vector(scale), Vector::Select0001))
    {
    }

    inline Transform::Transform(const AxisAngle& rotation)
        : m_rotation(rotation)
        , m_translationScale(Vector::UnitW)
    {
    }

    inline Matrix Transform::ToMatrix() const
    {
        return Matrix(m_rotation, m_translationScale.GetWithW1(), m_translationScale.GetSplatW());
    }

    inline Matrix Transform::ToMatrixNoScale() const
    {
        return Matrix(m_rotation, m_translationScale.GetWithW1(), Vector::One);
    }

    inline EulerAngles Transform::ToEulerAngles() const
    {
        return m_rotation.ToEulerAngles();
    }

    inline Vector Transform::GetAxisX() const
    {
        return m_rotation.RotateVector(Vector::UnitX);
    }

    inline Vector Transform::GetAxisY() const
    {
        return m_rotation.RotateVector(Vector::UnitY);
    }

    inline Vector Transform::GetAxisZ() const
    {
        return m_rotation.RotateVector(Vector::UnitZ);
    }

    inline Vector Transform::GetRightVector() const
    {
        return m_rotation.RotateVector(Vector::WorldRight);
    }

    inline Vector Transform::GetForwardVector() const
    {
        return m_rotation.RotateVector(Vector::WorldForward);
    }

    inline Vector Transform::GetUpVector() const
    {
        return m_rotation.RotateVector(Vector::WorldUp);
    }

    inline bool Transform::IsIdentity() const
    {
        return m_rotation.IsIdentity() && m_translationScale.IsEqual4(Vector::UnitW);
    }

    inline bool Transform::IsRigidTransform() const
    {
        return GetScale() == 1.0f;
    }

    inline void Transform::MakeRigidTransform()
    {
        SetScale(1.0f);
    }

    inline Transform& Transform::Inverse()
    {
        ASSERT(!m_translationScale.IsW0());

        Quaternion const inverseRotation = m_rotation.GetInverse();
        m_rotation = inverseRotation;

        Vector const inverseScale = GetInverseScaleVector();
        Vector const inverselyScaledTranslation = inverseScale * m_translationScale.GetWithW0();
        Vector const inverselyRotatedTranslation = inverseRotation.RotateVector(inverselyScaledTranslation);
        Vector const inverseTranslation = inverselyRotatedTranslation.GetNegated().SetW0();

        m_translationScale = Vector::Select(inverseTranslation, inverseScale, Vector::Select0001);

        return *this;
    }

    inline Transform Transform::GetInverse() const
    {
        Transform inverse = *this;
        return inverse.Inverse();
    }

    inline Transform Transform::GetDeltaToOther(const Transform& targetTransform) const
    {
        return Transform::Delta(*this, targetTransform);
    }

    inline Transform Transform::GetDeltaFromOther(const Transform& startTransform) const
    {
        return Transform::Delta(startTransform, *this);
    }

    inline const Quaternion& Transform::GetRotation() const
    {
        return m_rotation;
    }

    inline void Transform::SetRotation(const Quaternion& rotation)
    {
        ASSERT(rotation.IsNormalized());
        m_rotation = rotation;
    }

    inline void Transform::AddRotation(const Quaternion& delta)
    {
        ASSERT(delta.IsNormalized());
        m_rotation = delta * m_rotation;
    }

    inline const Vector& Transform::GetTranslation() const
    {
        return m_translationScale;
    }

    inline const Vector& Transform::GetTranslationAndScale() const
    {
        return m_translationScale;
    }

    inline void Transform::SetTranslation(const Vector& newTranslation)
    {
        m_translationScale = Vector::Select(newTranslation, m_translationScale, Vector::Select0001);
    }

    inline void Transform::SetTranslationAndScale(const Vector& newTranslationScale)
    {
        ASSERT(newTranslationScale.GetW() != 0.0f);
        m_translationScale = newTranslationScale;
    }

    inline void Transform::AddTranslation(const Vector& translationDelta)
    {
        m_translationScale += translationDelta.GetWithW0();
    }

    inline Vector Transform::GetTranslationAsVector() const
    {
        return m_translationScale.GetWithW0();
    }

    inline Vector Transform::GetTranslationAsPoint() const
    {
        return m_translationScale.GetWithW1();
    }

    inline float Transform::GetScale() const
    {
        return m_translationScale.GetW();
    }

    inline Vector Transform::GetScaleVector() const
    {
        return m_translationScale.GetSplatW();
    }

    inline Vector Transform::GetInverseScaleVector() const
    {
        return m_translationScale.GetSplatW().GetInverse();
    }

    inline void Transform::SetScale(float uniformScale)
    {
        m_translationScale.SetW(uniformScale);
    }

    inline bool Transform::HasScale() const
    {
        return m_translationScale.GetW() != 1.0f;
    }

    inline bool Transform::HasNegativeScale() const
    {
        return m_translationScale.GetW() < 0.0f;
    }

    inline Vector Transform::TranslateVector(const Vector& vector) const
    {
        return vector + m_translationScale.GetWithW0();
    }

    inline Vector Transform::ScaleVector(const Vector& vector) const
    {
        return vector * GetScaleVector();
    }

    inline Vector Transform::TransformPoint(const Vector& point) const
    {
        ASSERT(!m_translationScale.IsW0());
        Vector transformedPoint = point * m_translationScale.GetSplatW();
        transformedPoint = (m_translationScale + m_rotation.RotateVector(transformedPoint)).GetWithW0();
        return transformedPoint;
    }

    inline Vector Transform::TransformPointNoScale(const Vector& point) const
    {
        Vector transformedPoint = (m_translationScale + m_rotation.RotateVector(point)).GetWithW0();;
        return transformedPoint;
    }

    inline Vector Transform::RotateVector(const Vector& vector) const
    {
        return m_rotation.RotateVector(vector);
    }

    inline Vector Transform::TransformNormal(const Vector& vector) const
    {
        return RotateVector(vector);
    }

    inline Vector Transform::InverseRotateVector(const Vector& vector) const
    {
        return m_rotation.RotateVectorInverse(vector);
    }

    inline Vector Transform::InverseTransformPoint(const Vector& point) const
    {
        ASSERT(!m_translationScale.IsW0());
        Vector const shiftedPoint = point - m_translationScale;
        Vector const unrotatedShiftedPoint = m_rotation.RotateVectorInverse(shiftedPoint);
        Vector const inverseScale = GetInverseScaleVector();
        Vector const result = unrotatedShiftedPoint * inverseScale;
        return result;
    }

    inline Vector Transform::InverseTransformPointNoScale(const Vector& point) const
    {
        Vector const shiftedPoint = point - m_translationScale;
        Vector const unrotatedShiftedPoint = m_rotation.RotateVectorInverse(shiftedPoint);
        return unrotatedShiftedPoint;
    }

    inline Vector Transform::TransformVector(const Vector& vector) const
    {
        ASSERT(!m_translationScale.IsW0());
        Vector transformedVector = vector * GetScaleVector();
        transformedVector = m_rotation.RotateVector(transformedVector);
        return transformedVector;
    }

    inline Vector Transform::TransformVectorNoScale(const Vector& vector) const
    {
        return RotateVector(vector);
    }

    inline Vector Transform::InverseTransformVector(const Vector& vector) const
    {
        ASSERT(!m_translationScale.IsW0());
        Vector const unrotatedVector = m_rotation.RotateVectorInverse(vector);
        Vector const inverseScale = GetInverseScaleVector();
        Vector const result = unrotatedVector * inverseScale;
        return result;
    }

    inline Vector Transform::InverseTransformVectorNoScale(const Vector& vector) const
    {
        return m_rotation.RotateVectorInverse(vector);
    }

    inline Transform Transform::operator*(const Transform& rhs) const
    {
        Transform transform = *this;
        transform *= rhs;
        return transform;
    }

    inline Transform& Transform::operator*=(const Transform& rhs)
    {
        Vector const scale = GetScaleVector();
        Vector const rhsScale = rhs.GetScaleVector();
        Vector const minScale = Vector::Min(scale, rhsScale);
        Vector const finalScale = scale * rhsScale;

        if (minScale.IsAnyLessThan(Vector::Zero))
        {
            // Multiply the transforms using matrices to
            // get the correct rotation and then remove the scale;
            Matrix const lhsMtx = ToMatrix();
            Matrix const rhsMtx = rhs.ToMatrix();
            Matrix resultMtx = lhsMtx * rhsMtx;
            resultMtx.RemoveScaleFast();

            // Apply back the signs from the final scale
            Vector const sign = finalScale.GetSign();
            resultMtx[0] *= sign.GetSplatX();
            resultMtx[1] *= sign.GetSplatY();
            resultMtx[2] *= sign.GetSplatZ();

            m_rotation = resultMtx.GetRotation();
            ASSERT(m_rotation.IsNormalized());
            m_translationScale = Vector::Select(resultMtx.GetTranslation(), finalScale, Vector::Select0001);
        }
        else
        {
            // Normal case
            m_rotation = m_rotation * rhs.m_rotation;
            m_rotation.Normalize();
            Vector const translation = rhs.m_rotation.RotateVector(m_translationScale * rhsScale) + rhs.m_translationScale;
            m_translationScale = Vector::Select(translation, finalScale, Vector::Select0001);
        }

        return *this;
    }

    inline bool Transform::IsNearEqual(const Transform& rhs, const Radians angleThreshold, float translationScaleThreshold) const
    {
        if (!m_rotation.IsNearEqual(rhs.m_rotation, angleThreshold))
        {
            return false;
        }

        if (!m_translationScale.IsNearEqual4(rhs.m_translationScale, translationScaleThreshold))
        {
            return false;
        }

        return true;
    }

    inline bool Transform::operator==(const Transform& rhs) const
    {
        if (m_translationScale != rhs.m_translationScale)
        {
            return false;
        }

        if (m_rotation != rhs.m_rotation)
        {
            return false;
        }

        return true;
    }

    inline bool Transform::operator!=(const Transform& rhs) const
    {
        return !operator==(rhs);
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Types.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "Types.h"

Int2 const Int2::Zero = Int2( 0, 0 );

Int4 const Int4::Zero = Int4( 0, 0, 0, 0 );
Int4 const Int4::MinusOne = Int4( -1, -1, -1, -1 );

Float2 const Float2::Zero = Float2( 0, 0 );
Float2 const Float2::One = Float2( 1, 1 );
Float2 const Float2::UnitX = Float2( 1, 0 );
Float2 const Float2::UnitY = Float2( 0, 1 );

Float3 const Float3::Zero = Float3( 0, 0, 0 );
Float3 const Float3::One = Float3( 1, 1, 1 );
Float3 const Float3::UnitX = Float3( 1, 0, 0 );
Float3 const Float3::UnitY = Float3( 0, 1, 0 );
Float3 const Float3::UnitZ = Float3( 0, 0, 1 );

Float3 const Float3::WorldForward = Float3( 0, -1, 0 );
Float3 const Float3::WorldUp = Float3( 0, 0, 1 );
Float3 const Float3::WorldRight = Float3( -1, 0, 0 );

Float4 const Float4::Zero = Float4( 0, 0, 0, 0 );
Float4 const Float4::One = Float4( 1, 1, 1, 1 );
Float4 const Float4::UnitX = Float4( 1, 0, 0, 0 );
Float4 const Float4::UnitY = Float4( 0, 1, 0, 0 );
Float4 const Float4::UnitZ = Float4( 0, 0, 1, 0 );
Float4 const Float4::UnitW = Float4( 0, 0, 0, 1 );

Float4 const Float4::WorldForward = Float4( 0, -1, 0, 0 );
Float4 const Float4::WorldUp = Float4( 0, 0, 1, 0 );
Float4 const Float4::WorldRight = Float4( -1, 0, 0, 0 );

Radians const Radians::Pi = Radians( Math::Pi );
Radians const Radians::TwoPi = Radians( Math::TwoPi );
Radians const Radians::OneDivPi = Radians( Math::OneDivPi );
Radians const Radians::OneDivTwoPi = Radians( Math::OneDivTwoPi );
Radians const Radians::PiDivTwo = Radians( Math::PiDivTwo );
Radians const Radians::PiDivFour = Radians( Math::PiDivFour );


================================================
FILE: MotionCorrection/src/cpp/Math/Types.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Scalar.h"

enum NoInit_t { NoInit };
enum ZeroInit_t { ZeroInit };
enum IdentityInit_t { IdentityInit };

enum class Axis : uint8_t
{
    X = 0,
    Y,
    Z,
    NegX,
    NegY,
    NegZ
};

struct Float2;
struct Float3;
struct Float4;

struct Int2
{
    static Int2 const Zero;

public:

    inline Int2() {}
    inline Int2( ZeroInit_t ) : m_x( 0 ), m_y( 0 ) {}
    inline Int2( Float2 const& v );
    inline explicit Int2( int32_t v ) : m_x( v ), m_y( v ) {}
    inline explicit Int2( int32_t ix, int32_t iy ) : m_x( ix ), m_y( iy ) {}

    inline bool IsZero() const { return *this == Zero; }

    inline int32_t& operator[]( uint32_t i ) { return ( (int32_t*) this )[i]; }
    inline int32_t const& operator[]( uint32_t i ) const { return ( (int32_t*) this )[i]; }

    inline bool operator==( Int2 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y; }
    inline bool operator!=( Int2 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y; }

    inline Int2 operator+( Int2 const& rhs ) const { return Int2( m_x + rhs.m_x, m_y + rhs.m_y ); }
    inline Int2 operator-( Int2 const& rhs ) const { return Int2( m_x - rhs.m_x, m_y - rhs.m_y ); }
    inline Int2 operator*( Int2 const& rhs ) const { return Int2( m_x * rhs.m_x, m_y * rhs.m_y ); }
    inline Int2 operator/( Int2 const& rhs ) const { return Int2( m_x / rhs.m_x, m_y / rhs.m_y ); }

    inline Int2& operator+=( int32_t const& rhs ) { m_x += rhs; m_y += rhs; return *this; }
    inline Int2& operator-=( int32_t const& rhs ) { m_x -= rhs; m_y -= rhs; return *this; }
    inline Int2& operator*=( int32_t const& rhs ) { m_x *= rhs; m_y *= rhs; return *this; }
    inline Int2& operator/=( int32_t const& rhs ) { m_x /= rhs; m_y /= rhs; return *this; }

    // Component wise operation
    inline Int2 operator+( int32_t const& rhs ) const { return Int2( m_x + rhs, m_y + rhs ); }
    inline Int2 operator-( int32_t const& rhs ) const { return Int2( m_x - rhs, m_y - rhs ); }
    inline Int2 operator*( int32_t const& rhs ) const { return Int2( m_x * rhs, m_y * rhs ); }
    inline Int2 operator/( int32_t const& rhs ) const { return Int2( m_x / rhs, m_y / rhs ); }

    inline Int2& operator+=( Int2 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; return *this; }
    inline Int2& operator-=( Int2 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; return *this; }
    inline Int2& operator*=( Int2 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; return *this; }
    inline Int2& operator/=( Int2 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; return *this; }

public:

    int32_t m_x, m_y;
};

struct Int3
{
    static Int3 const Zero;

public:

    inline Int3() {}
    inline Int3( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ) {}
    inline Int3( Float3 const& v );
    inline explicit Int3( int32_t v ) : m_x( v ), m_y( v ), m_z( v ) {}
    inline explicit Int3( int32_t ix, int32_t iy, int32_t iz ) : m_x( ix ), m_y( iy ), m_z( iz ) {}

    inline bool IsZero() const { return *this == Zero; }

    inline int32_t& operator[]( uint32_t i ) { return ( (int32_t*) this )[i]; }
    inline int32_t const& operator[]( uint32_t i ) const { return ( (int32_t*) this )[i]; }

    inline bool operator==( Int3 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z; }
    inline bool operator!=( Int3 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z; }

    inline Int3 operator+( Int3 const& rhs ) const { return Int3( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z ); }
    inline Int3 operator-( Int3 const& rhs ) const { return Int3( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z ); }
    inline Int3 operator*( Int3 const& rhs ) const { return Int3( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z ); }
    inline Int3 operator/( Int3 const& rhs ) const { return Int3( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z ); }

    inline Int3& operator+=( int32_t const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; return *this; }
    inline Int3& operator-=( int32_t const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; return *this; }
    inline Int3& operator*=( int32_t const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; return *this; }
    inline Int3& operator/=( int32_t const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; return *this; }

    // Component wise operation
    inline Int3 operator+( int32_t const& rhs ) const { return Int3( m_x + rhs, m_y + rhs, m_z + rhs ); }
    inline Int3 operator-( int32_t const& rhs ) const { return Int3( m_x - rhs, m_y - rhs, m_z - rhs ); }
    inline Int3 operator*( int32_t const& rhs ) const { return Int3( m_x * rhs, m_y * rhs, m_z * rhs ); }
    inline Int3 operator/( int32_t const& rhs ) const { return Int3( m_x / rhs, m_y / rhs, m_z / rhs ); }

    inline Int3& operator+=( Int3 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; return *this; }
    inline Int3& operator-=( Int3 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; return *this; }
    inline Int3& operator*=( Int3 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; return *this; }
    inline Int3& operator/=( Int3 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; return *this; }

public:

    int32_t m_x, m_y, m_z;
};

struct Int4
{
    static Int4 const Zero;
    static Int4 const MinusOne;

public:

    inline Int4() {}
    inline Int4( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ), m_w( 0 ) {}
    inline explicit Int4( int32_t v ) : m_x( v ), m_y( v ), m_z( v ), m_w( v ) {}
    inline explicit Int4( int32_t ix, int32_t iy, int32_t iz, int32_t iw ) : m_x( ix ), m_y( iy ), m_z( iz ), m_w( iw ) {}

    inline bool IsZero() const { return *this == Zero; }

    inline int32_t& operator[]( uint32_t i ) { return ( (int32_t*) this )[i]; }
    inline int32_t const& operator[]( uint32_t i ) const { return ( (int32_t*) this )[i]; }

    inline bool operator==( Int4 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z && m_w == rhs.m_w; }
    inline bool operator!=( Int4 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z || m_w != rhs.m_w; }

    inline Int4 operator+( int32_t const& rhs ) const { return Int4( m_x + rhs, m_y + rhs, m_z + rhs, m_w + rhs ); }
    inline Int4 operator-( int32_t const& rhs ) const { return Int4( m_x - rhs, m_y - rhs, m_z - rhs, m_w - rhs ); }
    inline Int4 operator*( int32_t const& rhs ) const { return Int4( m_x * rhs, m_y * rhs, m_z * rhs, m_w * rhs ); }
    inline Int4 operator/( int32_t const& rhs ) const { return Int4( m_x / rhs, m_y / rhs, m_z / rhs, m_w / rhs ); }

    inline Int4& operator+=( int32_t const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; m_w += rhs; return *this; }
    inline Int4& operator-=( int32_t const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; m_w -= rhs; return *this; }
    inline Int4& operator*=( int32_t const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; m_w *= rhs; return *this; }
    inline Int4& operator/=( int32_t const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; m_w /= rhs; return *this; }

    // Component wise operation
    inline Int4 operator+( Int4 const& rhs ) const { return Int4( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z, m_w + rhs.m_w ); }
    inline Int4 operator-( Int4 const& rhs ) const { return Int4( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z, m_w - rhs.m_w ); }
    inline Int4 operator*( Int4 const& rhs ) const { return Int4( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z, m_w * rhs.m_w ); }
    inline Int4 operator/( Int4 const& rhs ) const { return Int4( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z, m_w / rhs.m_w ); }

    inline Int4& operator+=( Int4 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; m_w += rhs.m_w; return *this; }
    inline Int4& operator-=( Int4 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; m_w -= rhs.m_w; return *this; }
    inline Int4& operator*=( Int4 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; m_w *= rhs.m_w; return *this; }
    inline Int4& operator/=( Int4 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; m_w /= rhs.m_w; return *this; }

public:

    int32_t m_x, m_y, m_z, m_w;
};

struct Float2
{
    static Float2 const Zero;
    static Float2 const One;
    static Float2 const UnitX;
    static Float2 const UnitY;

public:

    inline Float2() {}
    FORCE_INLINE Float2( ZeroInit_t ) : m_x( 0 ), m_y( 0 ) {}
    FORCE_INLINE explicit Float2( float v ) : m_x( v ), m_y( v ) {}
    FORCE_INLINE explicit Float2( float ix, float iy ) : m_x( ix ), m_y( iy ) {}
    FORCE_INLINE explicit Float2( int32_t ix, int32_t iy ) : m_x( (float) ix ), m_y( (float) iy ) {}
    inline explicit Float2( Int2 const& v ) : m_x( (float) v.m_x ), m_y( (float) v.m_y ) {}
    inline explicit Float2( Float3 const& v );
    inline explicit Float2( Float4 const& v );

    inline bool IsZero() const { return *this == Zero; }

    inline float& operator[]( uint32_t i ) { return ( (float*) this )[i]; }
    inline float const& operator[]( uint32_t i ) const { return ( (float*) this )[i]; }

    FORCE_INLINE Float2 operator-() const { return Float2( -m_x, -m_y ); }

    inline bool operator==( Float2 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y; }
    inline bool operator!=( Float2 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y; }

    inline Float2 operator+( Float2 const& rhs ) const { return Float2( m_x + rhs.m_x, m_y + rhs.m_y ); }
    inline Float2 operator-( Float2 const& rhs ) const { return Float2( m_x - rhs.m_x, m_y - rhs.m_y ); }
    inline Float2 operator*( Float2 const& rhs ) const { return Float2( m_x * rhs.m_x, m_y * rhs.m_y ); }
    inline Float2 operator/( Float2 const& rhs ) const { return Float2( m_x / rhs.m_x, m_y / rhs.m_y ); }

    inline Float2 operator+( float const& rhs ) const { return Float2( m_x + rhs, m_y + rhs ); }
    inline Float2 operator-( float const& rhs ) const { return Float2( m_x - rhs, m_y - rhs ); }
    inline Float2 operator*( float const& rhs ) const { return Float2( m_x * rhs, m_y * rhs ); }
    inline Float2 operator/( float const& rhs ) const { return Float2( m_x / rhs, m_y / rhs ); }

    inline Float2& operator+=( Float2 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; return *this; }
    inline Float2& operator-=( Float2 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; return *this; }
    inline Float2& operator*=( Float2 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; return *this; }
    inline Float2& operator/=( Float2 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; return *this; }

    inline Float2& operator+=( float const& rhs ) { m_x += rhs; m_y += rhs; return *this; }
    inline Float2& operator-=( float const& rhs ) { m_x -= rhs; m_y -= rhs; return *this; }
    inline Float2& operator*=( float const& rhs ) { m_x *= rhs; m_y *= rhs; return *this; }
    inline Float2& operator/=( float const& rhs ) { m_x /= rhs; m_y /= rhs; return *this; }

    float m_x, m_y;
};

struct Float3
{
    static Float3 const Zero;
    static Float3 const One;
    static Float3 const UnitX;
    static Float3 const UnitY;
    static Float3 const UnitZ;

    static Float3 const WorldForward;
    static Float3 const WorldUp;
    static Float3 const WorldRight;

public:

    inline Float3() {}
    FORCE_INLINE Float3( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ) {}
    FORCE_INLINE explicit Float3( float v ) : m_x( v ), m_y( v ), m_z( v ) {}
    FORCE_INLINE explicit Float3( float ix, float iy, float iz ) : m_x( ix ), m_y( iy ), m_z( iz ) {}
    inline explicit Float3( Float2 const& v, float iz = 0.0f ) : m_x( v.m_x ), m_y( v.m_y ), m_z( iz ) {}
    inline explicit Float3( Float4 const& v );

    inline bool IsZero() const { return *this == Zero; }

    inline float& operator[]( uint32_t i ) { return ( (float*) this )[i]; }
    inline float const& operator[]( uint32_t i ) const { return ( (float*) this )[i]; }

    FORCE_INLINE Float3 operator-() const { return Float3( -m_x, -m_y, -m_z ); }

    inline bool operator==( Float3 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z; }
    inline bool operator!=( Float3 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z; }

    inline operator Float2() const { return Float2( m_x, m_y ); }

    inline Float3 operator+( Float3 const& rhs ) const { return Float3( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z ); }
    inline Float3 operator-( Float3 const& rhs ) const { return Float3( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z ); }
    inline Float3 operator*( Float3 const& rhs ) const { return Float3( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z ); }
    inline Float3 operator/( Float3 const& rhs ) const { return Float3( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z ); }

    inline Float3 operator+( float const& rhs ) const { return Float3( m_x + rhs, m_y + rhs, m_z + rhs ); }
    inline Float3 operator-( float const& rhs ) const { return Float3( m_x - rhs, m_y - rhs, m_z - rhs ); }
    inline Float3 operator*( float const& rhs ) const { return Float3( m_x * rhs, m_y * rhs, m_z * rhs ); }
    inline Float3 operator/( float const& rhs ) const { return Float3( m_x / rhs, m_y / rhs, m_z / rhs ); }

    inline Float3& operator+=( Float3 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; return *this; }
    inline Float3& operator-=( Float3 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; return *this; }
    inline Float3& operator*=( Float3 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; return *this; }
    inline Float3& operator/=( Float3 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; return *this; }

    inline Float3& operator+=( float const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; return *this; }
    inline Float3& operator-=( float const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; return *this; }
    inline Float3& operator*=( float const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; return *this; }
    inline Float3& operator/=( float const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; return *this; }

    float m_x, m_y, m_z;
};

struct Float4
{
    static Float4 const Zero;
    static Float4 const One;
    static Float4 const UnitX;
    static Float4 const UnitY;
    static Float4 const UnitZ;
    static Float4 const UnitW;

    static Float4 const WorldForward;
    static Float4 const WorldUp;
    static Float4 const WorldRight;

public:

    Float4() {}
    FORCE_INLINE Float4( ZeroInit_t ) : m_x( 0 ), m_y( 0 ), m_z( 0 ), m_w( 0 ) {}
    FORCE_INLINE explicit Float4( float v ) : m_x( v ), m_y( v ), m_z( v ), m_w( v ) {}
    FORCE_INLINE explicit Float4( float ix, float iy, float iz, float iw ) : m_x( ix ), m_y( iy ), m_z( iz ), m_w( iw ) {}
    explicit Float4( Float2 const& v, float iz = 0.0f, float iw = 0.0f ) : m_x( v.m_x ), m_y( v.m_y ), m_z( iz ), m_w( iw ) {}
    explicit Float4( Float3 const& v, float iw = 0.0f ) : m_x( v.m_x ), m_y( v.m_y ), m_z( v.m_z ), m_w( iw ) {}

    inline bool IsZero() const { return *this == Zero; }

    float& operator[]( uint32_t i ) { return ( (float*) this )[i]; }
    float const& operator[]( uint32_t i ) const { return ( (float*) this )[i]; }

    FORCE_INLINE Float4 operator-() const { return Float4( -m_x, -m_y, -m_z, -m_w ); }

    bool operator==( Float4 const rhs ) const { return m_x == rhs.m_x && m_y == rhs.m_y && m_z == rhs.m_z && m_w == rhs.m_w; }
    bool operator!=( Float4 const rhs ) const { return m_x != rhs.m_x || m_y != rhs.m_y || m_z != rhs.m_z || m_w != rhs.m_w; }

    inline operator Float2() const { return Float2( m_x, m_y ); }
    inline operator Float3() const { return Float3( m_x, m_y, m_z ); }

    inline Float4 operator+( Float4 const& rhs ) const { return Float4( m_x + rhs.m_x, m_y + rhs.m_y, m_z + rhs.m_z, m_w + rhs.m_w ); }
    inline Float4 operator-( Float4 const& rhs ) const { return Float4( m_x - rhs.m_x, m_y - rhs.m_y, m_z - rhs.m_z, m_w - rhs.m_w ); }
    inline Float4 operator*( Float4 const& rhs ) const { return Float4( m_x * rhs.m_x, m_y * rhs.m_y, m_z * rhs.m_z, m_w * rhs.m_w ); }
    inline Float4 operator/( Float4 const& rhs ) const { return Float4( m_x / rhs.m_x, m_y / rhs.m_y, m_z / rhs.m_z, m_w / rhs.m_w ); }

    inline Float4 operator+( float const& rhs ) const { return Float4( m_x + rhs, m_y + rhs, m_z + rhs, m_w + rhs ); }
    inline Float4 operator-( float const& rhs ) const { return Float4( m_x - rhs, m_y - rhs, m_z - rhs, m_w - rhs ); }
    inline Float4 operator*( float const& rhs ) const { return Float4( m_x * rhs, m_y * rhs, m_z * rhs, m_w * rhs ); }
    inline Float4 operator/( float const& rhs ) const { return Float4( m_x / rhs, m_y / rhs, m_z / rhs, m_w / rhs ); }

    inline Float4& operator+=( Float4 const& rhs ) { m_x += rhs.m_x; m_y += rhs.m_y; m_z += rhs.m_z; m_w += rhs.m_w; return *this; }
    inline Float4& operator-=( Float4 const& rhs ) { m_x -= rhs.m_x; m_y -= rhs.m_y; m_z -= rhs.m_z; m_w -= rhs.m_w; return *this; }
    inline Float4& operator*=( Float4 const& rhs ) { m_x *= rhs.m_x; m_y *= rhs.m_y; m_z *= rhs.m_z; m_w *= rhs.m_w; return *this; }
    inline Float4& operator/=( Float4 const& rhs ) { m_x /= rhs.m_x; m_y /= rhs.m_y; m_z /= rhs.m_z; m_w /= rhs.m_w; return *this; }

    inline Float4& operator+=( float const& rhs ) { m_x += rhs; m_y += rhs; m_z += rhs; m_w += rhs; return *this; }
    inline Float4& operator-=( float const& rhs ) { m_x -= rhs; m_y -= rhs; m_z -= rhs; m_w -= rhs; return *this; }
    inline Float4& operator*=( float const& rhs ) { m_x *= rhs; m_y *= rhs; m_z *= rhs; m_w *= rhs; return *this; }
    inline Float4& operator/=( float const& rhs ) { m_x /= rhs; m_y /= rhs; m_z /= rhs; m_w /= rhs; return *this; }

    float m_x, m_y, m_z, m_w;
};

inline Int2::Int2( Float2 const& v )
    : m_x( (int32_t) v.m_x )
    , m_y( (int32_t) v.m_y )
{
}

inline Int3::Int3( Float3 const& v )
    : m_x( (int32_t) v.m_x )
    , m_y( (int32_t) v.m_y )
    , m_z( (int32_t) v.m_z )
{
}

inline Float2::Float2( Float3 const& v )
    : m_x( v.m_x )
    , m_y( v.m_y )
{
}

inline Float2::Float2( Float4 const& v )
    : m_x( v.m_x )
    , m_y( v.m_y )
{
}

inline Float3::Float3( Float4 const& v )
    : m_x( v.m_x )
    , m_y( v.m_y )
    , m_z( v.m_z )
{
}

struct Radians;
struct Degrees;

struct Degrees
{
public:

    inline Degrees() = default;
    inline Degrees( float degrees ) : m_value( degrees ) {}
    inline explicit Degrees( Radians const& radians );

    FORCE_INLINE explicit operator float() const { return m_value; }
    FORCE_INLINE operator Radians() const;
    FORCE_INLINE float ToFloat() const { return m_value; }
    FORCE_INLINE Radians ToRadians() const;

    inline Degrees operator-() const { return Degrees( -m_value ); }

    inline Degrees operator+( Degrees const& rhs ) const { return Degrees( m_value + rhs.m_value ); }
    inline Degrees operator-( Degrees const& rhs ) const { return Degrees( m_value - rhs.m_value ); }
    inline Degrees operator*( Degrees const& rhs ) const { return Degrees( m_value * rhs.m_value ); }
    inline Degrees operator/( Degrees const& rhs ) const { return Degrees( m_value / rhs.m_value ); }

    inline Degrees& operator+=( Degrees const& rhs ) { m_value += rhs.m_value; return *this; }
    inline Degrees& operator-=( Degrees const& rhs ) { m_value -= rhs.m_value; return *this; }
    inline Degrees& operator*=( Degrees const& rhs ) { m_value *= rhs.m_value; return *this; }
    inline Degrees& operator/=( Degrees const& rhs ) { m_value /= rhs.m_value; return *this; }

    inline Degrees operator+( float const& rhs ) const { return Degrees( m_value + rhs ); }
    inline Degrees operator-( float const& rhs ) const { return Degrees( m_value - rhs ); }
    inline Degrees operator*( float const& rhs ) const { return Degrees( m_value * rhs ); }
    inline Degrees operator/( float const& rhs ) const { return Degrees( m_value / rhs ); }

    inline Degrees& operator+=( float const& rhs ) { m_value += rhs; return *this; }
    inline Degrees& operator-=( float const& rhs ) { m_value -= rhs; return *this; }
    inline Degrees& operator*=( float const& rhs ) { m_value *= rhs; return *this; }
    inline Degrees& operator/=( float const& rhs ) { m_value /= rhs; return *this; }

    inline Degrees operator+( int32_t const& rhs ) const { return Degrees( m_value + rhs ); }
    inline Degrees operator-( int32_t const& rhs ) const { return Degrees( m_value - rhs ); }
    inline Degrees operator*( int32_t const& rhs ) const { return Degrees( m_value * rhs ); }
    inline Degrees operator/( int32_t const& rhs ) const { return Degrees( m_value / rhs ); }

    inline Degrees& operator+=( int32_t const& rhs ) { m_value += rhs; return *this; }
    inline Degrees& operator-=( int32_t const& rhs ) { m_value -= rhs; return *this; }
    inline Degrees& operator*=( int32_t const& rhs ) { m_value *= rhs; return *this; }
    inline Degrees& operator/=( int32_t const& rhs ) { m_value /= rhs; return *this; }

    inline Degrees operator+( uint32_t const& rhs ) const { return Degrees( m_value + rhs ); }
    inline Degrees operator-( uint32_t const& rhs ) const { return Degrees( m_value - rhs ); }
    inline Degrees operator*( uint32_t const& rhs ) const { return Degrees( m_value * rhs ); }
    inline Degrees operator/( uint32_t const& rhs ) const { return Degrees( m_value / rhs ); }

    inline Degrees& operator+=( uint32_t const& rhs ) { m_value += rhs; return *this; }
    inline Degrees& operator-=( uint32_t const& rhs ) { m_value -= rhs; return *this; }
    inline Degrees& operator*=( uint32_t const& rhs ) { m_value *= rhs; return *this; }
    inline Degrees& operator/=( uint32_t const& rhs ) { m_value /= rhs; return *this; }

    inline bool operator>( float const& rhs ) const { return m_value > rhs; };
    inline bool operator<( float const& rhs ) const { return m_value < rhs; }
    inline bool operator>=( float const& rhs ) const { return m_value >= rhs; }
    inline bool operator<=( float const& rhs ) const { return m_value <= rhs; }

    inline bool operator>( Degrees const& rhs ) const { return m_value > rhs.m_value; }
    inline bool operator<( Degrees const& rhs ) const { return m_value < rhs.m_value; }
    inline bool operator>=( Degrees const& rhs ) const { return m_value >= rhs.m_value; }
    inline bool operator<=( Degrees const& rhs ) const { return m_value <= rhs.m_value; }

    inline bool operator>( Radians const& rhs ) const;
    inline bool operator<( Radians const& rhs ) const;
    inline bool operator>=( Radians const& rhs ) const;
    inline bool operator<=( Radians const& rhs ) const;

    inline bool operator==( float const& v ) const { return Math::IsNearEqual( m_value, v ); }
    inline bool operator!=( float const& v ) const { return !Math::IsNearEqual( m_value, v ); }

    inline bool operator==( Degrees const& rhs ) const  { return m_value == rhs.m_value; }
    inline bool operator!=( Degrees const& rhs ) const  { return m_value != rhs.m_value; }

    inline bool operator==( Radians const& rhs ) const;
    inline bool operator!=( Radians const& rhs ) const;

    inline void Clamp( Degrees min, Degrees max )
    {
        m_value = Math::Clamp( m_value, min.m_value, max.m_value );
    }

    // Clamps between -360 and 360
    inline void Clamp360()
    {
        m_value -= ( int32_t( m_value / 360.0f ) * 360.0f );
    }

    // Clamps between -360 and 360
    inline Degrees GetClamped360() const
    {
        Degrees d( m_value );
        d.Clamp360();
        return d;
    }

    // Clamps to -180 to 180
    inline void Clamp180()
    {
        Clamp360();

        float delta = 180 - Math::Abs( m_value );
        if ( delta < 0 )
        {
            delta += 180;
            m_value = ( m_value < 0 ) ? delta : -delta;
        }
    }

    // Clamps to -180 to 180
    inline Degrees GetClamped180() const
    {
        Degrees r( m_value );
        r.Clamp180();
        return r;
    }

    // Clamps between 0 to 360
    inline Degrees& ClampPositive360()
    {
        Clamp360();
        if ( m_value < 0 )
        {
            m_value += 360;
        }
        return *this;
    }

    // Clamps between 0 to 360
    inline Degrees GetClampedPositive360() const
    {
        Degrees d( m_value );
        d.ClampPositive360();
        return d;
    }

private:

    float m_value = 0;
};

struct Radians
{
    static Radians const Pi;
    static Radians const TwoPi;
    static Radians const OneDivPi;
    static Radians const OneDivTwoPi;
    static Radians const PiDivTwo;
    static Radians const PiDivFour;

public:

    inline Radians() = default;
    inline Radians( float radians ) : m_value( radians ) {}
    inline explicit Radians( Degrees const& degrees );

    FORCE_INLINE explicit operator float() const { return m_value; }
    FORCE_INLINE operator Degrees() const { return ToDegrees(); }
    FORCE_INLINE float ToFloat() const { return m_value; }
    FORCE_INLINE Degrees ToDegrees() const { return Degrees( m_value * Math::RadiansToDegrees ); }

    inline Radians operator-() const { return Radians( -m_value ); }

    inline Radians operator+( Radians const& rhs ) const { return Radians( m_value + rhs.m_value ); }
    inline Radians operator-( Radians const& rhs ) const { return Radians( m_value - rhs.m_value ); }
    inline Radians operator*( Radians const& rhs ) const { return Radians( m_value * rhs.m_value ); }
    inline Radians operator/( Radians const& rhs ) const { return Radians( m_value / rhs.m_value ); }

    inline Radians& operator+=( Radians const& rhs ) { m_value += rhs.m_value; return *this; }
    inline Radians& operator-=( Radians const& rhs ) { m_value -= rhs.m_value; return *this; }
    inline Radians& operator*=( Radians const& rhs ) { m_value *= rhs.m_value; return *this; }
    inline Radians& operator/=( Radians const& rhs ) { m_value /= rhs.m_value; return *this; }

    inline Radians operator+( float const& rhs ) const { return Radians( m_value + rhs ); }
    inline Radians operator-( float const& rhs ) const { return Radians( m_value - rhs ); }
    inline Radians operator*( float const& rhs ) const { return Radians( m_value * rhs ); }
    inline Radians operator/( float const& rhs ) const { return Radians( m_value / rhs ); }

    inline Radians& operator+=( float const& rhs ) { m_value += rhs; return *this; }
    inline Radians& operator-=( float const& rhs ) { m_value -= rhs; return *this; }
    inline Radians& operator*=( float const& rhs ) { m_value *= rhs; return *this; }
    inline Radians& operator/=( float const& rhs ) { m_value /= rhs; return *this; }

    inline Radians operator+( int32_t const& rhs ) const { return Radians( m_value + rhs ); }
    inline Radians operator-( int32_t const& rhs ) const { return Radians( m_value - rhs ); }
    inline Radians operator*( int32_t const& rhs ) const { return Radians( m_value * rhs ); }
    inline Radians operator/( int32_t const& rhs ) const { return Radians( m_value / rhs ); }

    inline Radians& operator+=( int32_t const& rhs ) { m_value += rhs; return *this; }
    inline Radians& operator-=( int32_t const& rhs ) { m_value -= rhs; return *this; }
    inline Radians& operator*=( int32_t const& rhs ) { m_value *= rhs; return *this; }
    inline Radians& operator/=( int32_t const& rhs ) { m_value /= rhs; return *this; }

    inline Radians operator+( uint32_t const& rhs ) const { return Radians( m_value + rhs ); }
    inline Radians operator-( uint32_t const& rhs ) const { return Radians( m_value - rhs ); }
    inline Radians operator*( uint32_t const& rhs ) const { return Radians( m_value * rhs ); }
    inline Radians operator/( uint32_t const& rhs ) const { return Radians( m_value / rhs ); }

    inline Radians& operator+=( uint32_t const& rhs ) { m_value += rhs; return *this; }
    inline Radians& operator-=( uint32_t const& rhs ) { m_value -= rhs; return *this; }
    inline Radians& operator*=( uint32_t const& rhs ) { m_value *= rhs; return *this; }
    inline Radians& operator/=( uint32_t const& rhs ) { m_value /= rhs; return *this; }

    inline bool operator>( float const& rhs ) const { return m_value > rhs; };
    inline bool operator<( float const& rhs ) const { return m_value < rhs; }
    inline bool operator>=( float const& rhs ) const { return m_value >= rhs; }
    inline bool operator<=( float const& rhs ) const { return m_value <= rhs; }

    inline bool operator>( Radians const& rhs ) const { return m_value > rhs.m_value; }
    inline bool operator<( Radians const& rhs ) const { return m_value < rhs.m_value; }
    inline bool operator>=( Radians const& rhs ) const { return m_value >= rhs.m_value; }
    inline bool operator<=( Radians const& rhs ) const { return m_value <= rhs.m_value; }

    inline bool operator>( Degrees const& rhs ) const;
    inline bool operator<( Degrees const& rhs ) const;
    inline bool operator>=( Degrees const& rhs ) const;
    inline bool operator<=( Degrees const& rhs ) const;

    inline bool operator==( float const& v ) const { return Math::IsNearEqual( m_value, v ); }
    inline bool operator!=( float const& v ) const { return !Math::IsNearEqual( m_value, v ); }

    inline bool operator==( Radians const& rhs ) const { return m_value == rhs.m_value; }
    inline bool operator!=( Radians const& rhs ) const { return m_value != rhs.m_value; }

    inline bool operator==( Degrees const& rhs ) const;
    inline bool operator!=( Degrees const& rhs ) const;

    inline void Clamp( Radians min, Radians max )
    {
        m_value = Math::Clamp( m_value, min.m_value, max.m_value );
    }

    // Clamps between -2Pi to 2Pi
    inline void Clamp360()
    {
        m_value -= int32_t( m_value / Math::TwoPi ) * Math::TwoPi;
    }

    // Clamps between -2Pi to 2Pi
    inline Radians GetClamped360() const
    {
        Radians r( m_value );
        r.Clamp360();
        return r;
    }

    // Clamps between 0 to 2Pi
    inline void ClampPositive360()
    {
        Clamp360();
        if( m_value < 0 )
        {
            m_value += Math::TwoPi;
        }
    }

    // Clamps between 0 to 2Pi
    inline Radians GetClampedToPositive360() const
    {
        Radians r( m_value );
        r.ClampPositive360();
        return r;
    }

    // Clamps to -Pi to Pi
    inline void Clamp180()
    {
        Clamp360();

        float delta = Math::Pi - Math::Abs( m_value );
        if ( delta < 0 )
        {
            delta += Math::Pi;
            m_value = ( m_value < 0 ) ? delta : -delta;
        }
    }

    // Clamps to -Pi to Pi
    inline Radians GetClamped180() const
    {
        Radians r( m_value );
        r.Clamp180();
        return r;
    }

    // Inverts angle between [0;2Pi] and [-2Pi;0]
    inline void Invert()
    {
        Clamp360();
        float const delta = Math::TwoPi - Math::Abs( m_value );
        m_value = ( m_value < 0 ) ? delta : -delta;
    }

    // Inverts angle between [0;2Pi] and [-2Pi;0]
    inline Radians GetInverse() const
    {
        Radians r( m_value );
        r.Invert();
        return r;
    }

    // Flips the front and rear 180 degree arc i.e. 135 becomes -45, -90 becomes 90, etc.
    inline void Flip()
    {
        Clamp180();
        float const delta = Math::Pi - Math::Abs( m_value );
        m_value = ( m_value < 0 ) ? delta : -delta;
    }

    // Flips the front and rear 180 degree arc i.e. 135 becomes -45, -90 becomes 90, etc.
    inline Radians GetFlipped() const
    {
        Radians r( m_value );
        r.Flip();
        return r;
    }

private:

    float m_value = 0;
};

inline Degrees::Degrees( Radians const& radians )
    : m_value( radians.ToDegrees() )
{}

inline Radians Degrees::ToRadians() const
{
    return Radians( m_value * Math::DegreesToRadians );
}

inline Degrees::operator Radians() const
{
    return ToRadians();
}

inline bool Degrees::operator>( Radians const& rhs ) const { return m_value > rhs.ToDegrees().m_value; }
inline bool Degrees::operator<( Radians const& rhs ) const { return m_value < rhs.ToDegrees().m_value; }
inline bool Degrees::operator>=( Radians const& rhs ) const { return m_value >= rhs.ToDegrees().m_value; }
inline bool Degrees::operator<=( Radians const& rhs ) const { return m_value <= rhs.ToDegrees().m_value; }

inline bool Degrees::operator==( Radians const& rhs ) const { return Math::IsNearEqual( m_value, rhs.ToDegrees().m_value ); }
inline bool Degrees::operator!=( Radians const& rhs ) const { return !Math::IsNearEqual( m_value, rhs.ToDegrees().m_value ); }

inline Radians::Radians( Degrees const& degrees )
    : m_value( degrees.ToRadians() )
{}

inline bool Radians::operator>( Degrees const& rhs ) const { return m_value > rhs.ToRadians().m_value; }
inline bool Radians::operator<( Degrees const& rhs ) const { return m_value < rhs.ToRadians().m_value; }
inline bool Radians::operator>=( Degrees const& rhs ) const { return m_value >= rhs.ToRadians().m_value; }
inline bool Radians::operator<=( Degrees const& rhs ) const { return m_value <= rhs.ToRadians().m_value; }

inline bool Radians::operator==( Degrees const& rhs ) const { return Math::IsNearEqual( m_value, rhs.ToRadians().m_value ); }
inline bool Radians::operator!=( Degrees const& rhs ) const { return !Math::IsNearEqual( m_value, rhs.ToRadians().m_value ); }

struct EulerAngles
{
public:

    EulerAngles() = default;

    inline explicit EulerAngles( Degrees inX, Degrees inY, Degrees inZ )
        : m_x( inX )
        , m_y( inY )
        , m_z( inZ )
    {}

    inline explicit EulerAngles( Radians inX, Radians inY, Radians inZ )
        : m_x( inX )
        , m_y( inY )
        , m_z( inZ )
    {}

    inline explicit EulerAngles( float inDegreesX, float inDegreesY, float inDegreesZ )
        : m_x( Math::DegreesToRadians * inDegreesX )
        , m_y( Math::DegreesToRadians * inDegreesY )
        , m_z( Math::DegreesToRadians * inDegreesZ )
    {}

    inline EulerAngles( Float3 const& anglesInDegrees )
        : m_x( Math::DegreesToRadians * anglesInDegrees.m_x )
        , m_y( Math::DegreesToRadians * anglesInDegrees.m_y )
        , m_z( Math::DegreesToRadians * anglesInDegrees.m_z )
    {}

    inline void Clamp()
    {
        m_x.Clamp360();
        m_y.Clamp360();
        m_z.Clamp360();
    }

    inline EulerAngles GetClamped() const
    {
        EulerAngles clamped = *this;
        clamped.Clamp();
        return clamped;
    }

    inline Radians GetYaw() const { return m_z; }
    inline Radians GetPitch() const { return m_x; }
    inline Radians GetRoll() const { return m_y; }

    inline Float3 GetAsRadians() const { return Float3( m_x.ToFloat(), m_y.ToFloat(), m_z.ToFloat() ); }
    inline Float3 GetAsDegrees() const { return Float3( m_x.ToDegrees().ToFloat(), m_y.ToDegrees().ToFloat(), m_z.ToDegrees().ToFloat() ); }

    inline bool operator==( EulerAngles const& other ) const { return m_x == other.m_x && m_y == other.m_y && m_z == other.m_z; }
    inline bool operator!=( EulerAngles const& other ) const { return m_x != other.m_x || m_y != other.m_y || m_z != other.m_z; }

    inline Radians& operator[]( uint32_t i ) { return ( (Radians*) this )[i]; }
    inline Radians const& operator[]( uint32_t i ) const { return ( (Radians*) this )[i]; }
    // in degrees
    inline Float3 ToFloat3() const { return Float3( Math::RadiansToDegrees * m_x.ToFloat(), Math::RadiansToDegrees * m_y.ToFloat(), Math::RadiansToDegrees * m_z.ToFloat() ); }

public:

    Radians m_x = 0.0f;
    Radians m_y = 0.0f;
    Radians m_z = 0.0f;
};

struct AxisAngle
{
public:

    inline AxisAngle() = default;
    inline explicit AxisAngle( Float3 axis, Radians angle ) : m_axis( axis ), m_angle( angle ) {}
    inline explicit AxisAngle( Float3 axis, Degrees angle ) : m_axis( axis ), m_angle( angle.ToRadians() ) {}

    inline bool IsValid() const
    {
        float const lengthSq = m_axis.m_x * m_axis.m_x + m_axis.m_y * m_axis.m_y + m_axis.m_z * m_axis.m_z;
        return Math::Abs( lengthSq - 1.0f ) < Math::Epsilon;
    }

public:

    Float3      m_axis = Float3::Zero;
    Radians     m_angle = Radians( 0.0f );
};


================================================
FILE: MotionCorrection/src/cpp/Math/Vector.cpp
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "Vector.h"
#include "Quaternion.h"

namespace Math
{
    Vector const Vector::UnitX = { 1, 0, 0, 0 };
    Vector const Vector::UnitY = { 0, 1, 0, 0 };
    Vector const Vector::UnitZ = { 0, 0, 1, 0 };
    Vector const Vector::UnitW = { 0, 0, 0, 1 };

    Vector const Vector::Origin = { 0, 0, 0, 1 };
    Vector const Vector::WorldForward = { 0, -1, 0, 0 };
    Vector const Vector::WorldBackward = { 0, 1, 0, 0 };
    Vector const Vector::WorldUp = { 0, 0, 1, 0 };
    Vector const Vector::WorldDown = { 0, 0, -1, 0 };
    Vector const Vector::WorldLeft = { 1, 0, 0, 0 };
    Vector const Vector::WorldRight = { -1, 0, 0, 0 };

    Vector const Vector::Infinity = { 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000 };
    Vector const Vector::QNaN = { 0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000 };

    Vector const Vector::NegativeOne(-1.0f);
    Vector const Vector::Zero(0.0f);
    Vector const Vector::Half(0.5f);
    Vector const Vector::One(1.0f);

    Vector const Vector::Epsilon(Math::Epsilon);
    Vector const Vector::LargeEpsilon(Math::LargeEpsilon);
    Vector const Vector::OneMinusEpsilon(1.0f - Math::Epsilon);
    Vector const Vector::EpsilonMinusOne(Math::Epsilon - 1.0f);
    Vector const Vector::NormalizeCheckThreshold(0.01f); // Squared Error

    Vector const Vector::Pi(Math::Pi);
    Vector const Vector::PiDivTwo(Math::PiDivTwo);
    Vector const Vector::TwoPi(Math::TwoPi);
    Vector const Vector::OneDivTwoPi(Math::OneDivTwoPi);

    Vector const Vector::Select0000(0, 0, 0, 0);
    Vector const Vector::Select0001(0, 0, 0, 1);
    Vector const Vector::Select0010(0, 0, 1, 0);
    Vector const Vector::Select0011(0, 0, 1, 1);
    Vector const Vector::Select0100(0, 1, 0, 0);
    Vector const Vector::Select0101(0, 1, 0, 1);
    Vector const Vector::Select0110(0, 1, 1, 0);
    Vector const Vector::Select0111(0, 1, 1, 1);
    Vector const Vector::Select1000(1, 0, 0, 0);
    Vector const Vector::Select1001(1, 0, 0, 1);
    Vector const Vector::Select1010(1, 0, 1, 0);
    Vector const Vector::Select1011(1, 0, 1, 1);
    Vector const Vector::Select1100(1, 1, 0, 0);
    Vector const Vector::Select1101(1, 1, 0, 1);
    Vector const Vector::Select1110(1, 1, 1, 0);
    Vector const Vector::Select1111(1, 1, 1, 1);

    Vector const Vector::BoxCorners[8] =
    {
        { -1.0f, -1.0f,  1.0f, 0.0f },
        {  1.0f, -1.0f,  1.0f, 0.0f },
        {  1.0f,  1.0f,  1.0f, 0.0f },
        { -1.0f,  1.0f,  1.0f, 0.0f },
        { -1.0f, -1.0f, -1.0f, 0.0f },
        {  1.0f, -1.0f, -1.0f, 0.0f },
        {  1.0f,  1.0f, -1.0f, 0.0f },
        { -1.0f,  1.0f, -1.0f, 0.0f },
    };

    Vector Vector::SLerp(const Vector& from, const Vector& to, float t)
    {
        ASSERT(t >= 0.0f && t <= 1.0f);
        if (from.LengthSquared3().IsLessThan4(Epsilon) || to.LengthSquared3().IsLessThan4(Epsilon))
        {
            return Lerp(from, to, t);
        }

        // Calculate the final length
        const Vector fromLength = from.Length3();
        const Vector toLength = to.Length3();
        const Vector finalLength = Lerp(fromLength, toLength, t);

        // Normalize vectors
        const Vector normalizedFrom = from / fromLength;
        const Vector normalizedTo = to / toLength;

        // Handle parallel vector
        Vector result;
        if (normalizedFrom.IsParallelTo(normalizedTo))
        {
            result = normalizedFrom;
        }
        else
        {
            // Interpolate the rotation between the vectors
            const Vector dot = Dot3(normalizedFrom, normalizedTo);
            const Vector angle = ACos(dot);
            const Vector axis = Cross3(normalizedFrom, normalizedTo).Normalize3();
            const Vector interpolatedAngle = Lerp(Zero, angle, t);

            const Quaternion rotation(axis, Radians(interpolatedAngle.ToFloat()));
            const Vector finalDirection = rotation.RotateVector(normalizedFrom);
            result = finalDirection.GetNormalized3() * finalLength;
        }

        return result;
    }
}


================================================
FILE: MotionCorrection/src/cpp/Math/Vector.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include "Compiler.h"

#include "Types.h"
#include "Constants.h"
#include "SIMD.h"

namespace Math
{
    class alignas(16) Vector
    {
    public:

        static Vector const UnitX;
        static Vector const UnitY;
        static Vector const UnitZ;
        static Vector const UnitW;

        static Vector const Origin;
        static Vector const WorldForward;
        static Vector const WorldBackward;
        static Vector const WorldUp;
        static Vector const WorldDown;
        static Vector const WorldLeft;
        static Vector const WorldRight;

        static Vector const NegativeOne;
        static Vector const Zero;
        static Vector const Half;
        static Vector const One;
        static Vector const Epsilon;
        static Vector const LargeEpsilon;
        static Vector const OneMinusEpsilon;
        static Vector const EpsilonMinusOne;
        static Vector const NormalizeCheckThreshold;
        static Vector const Pi;
        static Vector const PiDivTwo;
        static Vector const TwoPi;
        static Vector const OneDivTwoPi;

        static Vector const Select0000;
        static Vector const Select0001;
        static Vector const Select0010;
        static Vector const Select0011;
        static Vector const Select0100;
        static Vector const Select0101;
        static Vector const Select0110;
        static Vector const Select0111;
        static Vector const Select1000;
        static Vector const Select1001;
        static Vector const Select1010;
        static Vector const Select1011;
        static Vector const Select1100;
        static Vector const Select1101;
        static Vector const Select1110;
        static Vector const Select1111;

        static Vector const Infinity;
        static Vector const QNaN;

        static Vector const BoxCorners[8];

        //
        // Utils
        //

        static Vector Cross2(const Vector& v0, const Vector& v1);
        static Vector Cross3(const Vector& v0, const Vector& v1);
        static Vector Dot2(const Vector& v0, const Vector& v1);
        static Vector Dot3(const Vector& v0, const Vector& v1);
        static Vector Dot4(const Vector& v0, const Vector& v1);
        static Vector Average2(const Vector& v0, const Vector& v1);
        static Vector Average3(const Vector& v0, const Vector& v1);
        static Vector Average4(const Vector& v0, const Vector& v1);
        static Vector Min(const Vector& v0, const Vector& v1);
        static Vector Max(const Vector& v0, const Vector& v1);
        static float Min(const Vector& v);
        static float Max(const Vector& v);
        static Vector Clamp(const Vector& v, const Vector& min, const Vector& max);
        static Vector Xor(const Vector& vec0, const Vector& vec1);

        // Add the multiplied results to a vector: ( vec * mul ) + addend
        static Vector MultiplyAdd(const Vector& vec, const Vector& multiplier, const Vector& addend);

        // Subtract a vector from the multiplied result: (vec * mul ) - subtrahend
        static Vector MultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& subtrahend);

        // Subtract the multiplied result from a vector: minuend - (vec * mul )
        static Vector NegativeMultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& minuend);

        // Sum up scaled versions of two vectors
        static Vector LinearCombination(const Vector& v0, const Vector& v1, float scale0, float scale1);

        // Linear interpolation of one vector to another
        static Vector Lerp(const Vector& from, const Vector& to, float t);

        // Normalized linear interpolation of one vector to another
        static Vector NLerp(const Vector& from, const Vector& to, float t);

        // Spherical interpolation of one vector to another
        static Vector SLerp(const Vector& from, const Vector& to, float t);

        // Combine the two vectors based on the control: 0 means select from v0, 1 means select from v1. E.G. To select XY from v0 and ZW from v1, control = Vector( 0, 0, 1, 1 )
        static Vector Select(const Vector& v0, const Vector& v1, const Vector& control);

        // Get a permutation of two vectors, each template argument represents the element index to select ( v0: 0-3, v1: 4-7 );
        template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
        static Vector Permute(const Vector& v0, const Vector& v1);

        //
        // Trigonometry
        //

        static Vector Sin(const Vector& vec);
        static Vector Cos(const Vector& vec);
        static Vector Tan(const Vector& vec);
        static Vector ASin(const Vector& vec);
        static Vector ACos(const Vector& vec);
        static Vector ATan(const Vector& vec);
        static Vector ATan2(const Vector& vec0, const Vector& vec1);

        static Vector SinEst(const Vector& vec);
        static Vector CosEst(const Vector& vec);
        static Vector TanEst(const Vector& vec);
        static Vector ASinEst(const Vector& vec);
        static Vector ACosEst(const Vector& vec);
        static Vector ATanEst(const Vector& vec);
        static Vector ATan2Est(const Vector& vec0, const Vector& vec1);

        static void SinCos(Vector& sin, Vector& cos, float angle);
        static void SinCos(Vector& sin, Vector& cos, const Vector& angle);

        static Vector AngleMod2Pi(const Vector& angles);

    public:

        operator __m128& ();
        operator const __m128& () const;

        Vector();
        explicit Vector(Axis axis);
        explicit Vector(ZeroInit_t);
        explicit Vector(float v);
        Vector(__m128 v);
        Vector(float ix, float iy, float iz, float iw = 1.0f);

        Vector(const Float2& v, float iz = 0.0f, float iw = 0.0f);
        Vector(const Float3& v, float iw = 1.0f);
        Vector(const Float4& v);
        Vector(const float* pValues);

        bool IsValid() const;

        void Store(float* pValues) const;
        void StoreFloat(float& value) const;
        void StoreFloat2(Float2& value) const;
        void StoreFloat3(Float3& value) const;
        void StoreFloat4(Float4& value) const;

        float ToFloat() const;
        Float2 ToFloat2() const;
        Float3 ToFloat3() const;
        Float4 ToFloat4() const;

        operator Float2() const;
        operator Float3() const;
        operator Float4() const;

        //
        // Element accessors
        //

        float GetX() const;
        float GetY() const;
        float GetZ() const;
        float GetW() const;

        void SetX(float x);
        void SetY(float y);
        void SetZ(float z);
        void SetW(float w);

        float operator[](uint32_t i) const;

        //
        // W component operations
        //

        bool IsW1() const;
        bool IsW0() const;
        Vector& SetW0();
        Vector& SetW1();
        Vector GetWithW0() const;
        Vector GetWithW1() const;

        //
        // Dimensional Getters
        //

        // Returns only the first two components, z=w=0
        Vector Get2D() const;

        // Returns only the first three components, w = 0
        Vector Get3D() const;

        //
        // Algebraic operators
        //

        Vector operator+(const Vector& v) const;
        Vector& operator+=(const Vector& v);
        Vector operator-(const Vector& v) const;
        Vector& operator-=(const Vector& v);
        Vector operator*(const Vector& v) const;
        Vector& operator*=(const Vector& v);
        Vector operator/(const Vector& v) const;
        Vector& operator/=(const Vector& v);

        Vector operator*(float const f) const;
        Vector& operator*=(float const f);
        Vector operator/(float const f) const;
        Vector& operator/=(float const f);

        Vector operator-() const;

        Vector Orthogonal2D() const;
        Vector Cross2(const Vector& other) const;
        Vector Cross3(const Vector& other) const;
        Vector Dot2(const Vector& other) const;
        Vector Dot3(const Vector& other) const;
        Vector Dot4(const Vector& other) const;
        float GetDot2(const Vector& other) const;
        float GetDot3(const Vector& other) const;
        float GetDot4(const Vector& other) const;

        Vector ScalarProjection(const Vector& other) const;
        float GetScalarProjection(const Vector& other) const;
        Vector VectorProjection(const Vector& other) const;

        //
        // Transformations
        //

        Vector& Invert();
        Vector GetInverse() const;
        Vector GetReciprocal() const;

        Vector& InvertEst();
        Vector GetInverseEst() const;

        Vector& Negate();
        Vector GetNegated() const;

        Vector& Abs();
        Vector GetAbs() const;

        Vector& Sqrt();
        Vector GetSqrt();

        Vector& ReciprocalSqrt();
        Vector GetReciprocalSqrt();

        Vector& EstimatedReciprocalSqrt();
        Vector GetEstimatedReciprocalSqrt();

        Vector& Normalize2();
        Vector& Normalize3();
        Vector& Normalize4();

        Vector GetNormalized2() const;
        Vector GetNormalized3() const;
        Vector GetNormalized4() const;

        Vector& Floor();
        Vector GetFloor() const;
        Vector& Ceil();
        Vector GetCeil() const;
        Vector& Round();
        Vector GetRound() const;

        Vector GetSign() const;

        //
        // Permutations
        //

        Vector GetSplatX() const;
        Vector GetSplatY() const;
        Vector GetSplatZ() const;
        Vector GetSplatW() const;

        // Get a shuffled version of this vector, each argument represents the element index in the original vector
        template<uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx>
        Vector Swizzle() const;

        // Get a shuffled version of this vector, each argument represents the element index in the original vector
        Vector Swizzle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const;

        // Get a shuffled version of this vector, each argument represents the element index in the original vector
        Vector Shuffle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const;

        // Get a shuffled version of this vector, each argument represents the element index in the original vector
        template<uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx>
        Vector Shuffle() const;

        //
        // Queries
        //

        Vector Length2() const;
        Vector Length3() const;
        Vector Length4() const;

        float GetLength2() const;
        float GetLength3() const;
        float GetLength4() const;

        Vector InverseLength2() const;
        Vector InverseLength3() const;
        Vector InverseLength4() const;

        float GetInverseLength2() const;
        float GetInverseLength3() const;
        float GetInverseLength4() const;

        Vector LengthSquared2() const;
        Vector LengthSquared3() const;
        Vector LengthSquared4() const;

        float GetLengthSquared2() const;
        float GetLengthSquared3() const;
        float GetLengthSquared4() const;

        Vector Distance2(const Vector& to) const;
        Vector Distance3(const Vector& to) const;
        Vector Distance4(const Vector& to) const;

        float GetDistance2(const Vector& to) const;
        float GetDistance3(const Vector& to) const;
        float GetDistance4(const Vector& to) const;

        Vector DistanceSquared2(const Vector& to) const;
        Vector DistanceSquared3(const Vector& to) const;
        Vector DistanceSquared4(const Vector& to) const;

        float GetDistanceSquared2(const Vector& to) const;
        float GetDistanceSquared3(const Vector& to) const;
        float GetDistanceSquared4(const Vector& to) const;

        bool IsNormalized2() const;
        bool IsNormalized3() const;
        bool IsNormalized4() const;

        // Is this vector within the range [-bounds, bounds]
        Vector InBounds(const Vector& bounds) const;

        bool IsInBounds2(const Vector& bounds) const;
        bool IsInBounds3(const Vector& bounds) const;
        bool IsInBounds4(const Vector& bounds) const;

        Vector Equal(const Vector& v) const;

        bool IsEqual2(const Vector& v) const;
        bool IsEqual3(const Vector& v) const;
        bool IsEqual4(const Vector& v) const;

        Vector NearEqual(const Vector& v, const Vector& epsilon) const;

        bool IsNearEqual2(const Vector& v, float epsilon) const;
        bool IsNearEqual3(const Vector& v, float epsilon) const;
        bool IsNearEqual4(const Vector& v, float epsilon) const;

        bool IsNearEqual2(const Vector& v, const Vector& epsilon = Vector::Epsilon) const;
        bool IsNearEqual3(const Vector& v, const Vector& epsilon = Vector::Epsilon) const;
        bool IsNearEqual4(const Vector& v, const Vector& epsilon = Vector::Epsilon) const;

        Vector GreaterThan(const Vector& v) const;
        bool IsAnyGreaterThan(const Vector& v) const;

        bool IsGreaterThan2(const Vector& v) const;
        bool IsGreaterThan3(const Vector& v) const;
        bool IsGreaterThan4(const Vector& v) const;

        Vector GreaterThanEqual(const Vector& v) const;
        bool IsAnyGreaterThanEqual(const Vector& v) const;

        bool IsGreaterThanEqual2(const Vector& v) const;
        bool IsGreaterThanEqual3(const Vector& v) const;
        bool IsGreaterThanEqual4(const Vector& v) const;

        Vector LessThan(const Vector& v) const;
        bool IsAnyLessThan(const Vector& v) const;

        bool IsLessThan2(const Vector& v) const;
        bool IsLessThan3(const Vector& v) const;
        bool IsLessThan4(const Vector& v) const;

        Vector LessThanEqual(const Vector& v) const;
        bool IsAnyLessThanEqual(const Vector& v) const;

        bool IsLessThanEqual2(const Vector& v) const;
        bool IsLessThanEqual3(const Vector& v) const;
        bool IsLessThanEqual4(const Vector& v) const;

        Vector EqualsZero() const;
        bool IsAnyEqualToZero2() const;
        bool IsAnyEqualToZero3() const;
        bool IsAnyEqualToZero4() const;

        bool IsZero2() const;
        bool IsZero3() const;
        bool IsZero4() const;

        Vector NearEqualsZero(float epsilon = Math::Epsilon) const;

        bool IsNearZero2(float epsilon = Math::Epsilon) const;
        bool IsNearZero3(float epsilon = Math::Epsilon) const;
        bool IsNearZero4(float epsilon = Math::Epsilon) const;

        Vector EqualsInfinity() const;

        bool IsInfinite2() const;
        bool IsInfinite3() const;
        bool IsInfinite4() const;

        Vector EqualsNaN() const;

        bool IsNaN2() const;
        bool IsNaN3() const;
        bool IsNaN4() const;

        bool IsParallelTo(const Vector& v) const;

        void ToDirectionAndLength2(Vector& direction, float& length) const;
        void ToDirectionAndLength3(Vector& direction, float& length) const;

        bool operator==(const Vector& rhs) const;
        bool operator!=(const Vector& rhs) const;

    public:

        __m128 m_data;
    };

    static_assert(sizeof(Vector) == 16, "Vector size must be 16 bytes!");
}

#include "Vector.inl"


================================================
FILE: MotionCorrection/src/cpp/Math/Vector.inl
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include <cstring>

#include "Vector.h"

namespace Math
{
    FORCE_INLINE Vector Vector::Cross2(const Vector& v0, const Vector& v1)
    {
        return v0.Cross2(v1);
    }

    FORCE_INLINE Vector Vector::Cross3(const Vector& v0, const Vector& v1)
    {
        return v0.Cross3(v1);
    }

    FORCE_INLINE Vector Vector::Dot2(const Vector& v0, const Vector& v1)
    {
        return v0.Dot2(v1);
    }

    FORCE_INLINE Vector Vector::Dot3(const Vector& v0, const Vector& v1)
    {
        return v0.Dot3(v1);
    }

    FORCE_INLINE Vector Vector::Dot4(const Vector& v0, const Vector& v1)
    {
        return v0.Dot4(v1);
    }

    FORCE_INLINE Vector Vector::Average2(const Vector& v0, const Vector& v1)
    {
        auto avg4 = Average4(v0, v1);
        return Vector::Select(avg4, Vector::Zero, Vector(0, 0, 1, 1));
    }

    FORCE_INLINE Vector Vector::Average3(const Vector& v0, const Vector& v1)
    {
        auto avg4 = Average4(v0, v1);
        return Vector::Select(avg4, Vector::Zero, Vector(0, 0, 0, 1));
    }

    FORCE_INLINE Vector Vector::Average4(const Vector& v0, const Vector& v1)
    {
        return (v0 + v1) * Vector::Half;
    }

    FORCE_INLINE Vector Vector::Min(const Vector& v0, const Vector& v1)
    {
        Vector result;
        result = _mm_min_ps(v0, v1);
        return result;
    }

    FORCE_INLINE Vector Vector::Max(const Vector& v0, const Vector& v1)
    {
        Vector result;
        result = _mm_max_ps(v0, v1);
        return result;
    }

    FORCE_INLINE float Vector::Min(const Vector& v)
    {
        __m128 shufReg, sumsReg;
        shufReg = _mm_movehdup_ps(v);
        sumsReg = _mm_min_ps(v, shufReg);
        shufReg = _mm_movehl_ps(shufReg, sumsReg);
        sumsReg = _mm_min_ss(sumsReg, shufReg);
        return _mm_cvtss_f32(sumsReg);
    }

    FORCE_INLINE float Vector::Max(const Vector& v)
    {
        __m128 shufReg, sumsReg;
        shufReg = _mm_movehdup_ps(v);
        sumsReg = _mm_max_ps(v, shufReg);
        shufReg = _mm_movehl_ps(shufReg, sumsReg);
        sumsReg = _mm_max_ss(sumsReg, shufReg);
        return _mm_cvtss_f32(sumsReg);
    }

    FORCE_INLINE Vector Vector::Clamp(const Vector& v, const Vector& min, const Vector& max)
    {
        Vector result;
        result = _mm_max_ps(min, v);
        result = _mm_min_ps(result, max);
        return result;
    }

    FORCE_INLINE Vector Vector::Xor(const Vector& v0, const Vector& v1)
    {
        __m128i V = _mm_xor_si128(_mm_castps_si128(v0), _mm_castps_si128(v1));

        Vector result;
        result = _mm_castsi128_ps(V);
        return result;
    }

    FORCE_INLINE Vector Vector::MultiplyAdd(const Vector& v, const Vector& multiplier, const Vector& addend)
    {
        // result = addend + ( vec * multiplier )
        Vector result;
        result = _mm_mul_ps(v, multiplier);
        result = _mm_add_ps(result, addend);
        return result;
    }

    FORCE_INLINE Vector Vector::MultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& subtrahend)
    {
        // result = ( vec * multiplier ) - subtrahend
        auto r = _mm_mul_ps(vec, multiplier);
        return _mm_sub_ps(r, subtrahend);
    }

    FORCE_INLINE Vector Vector::NegativeMultiplySubtract(const Vector& vec, const Vector& multiplier, const Vector& minuend)
    {
        // result = minuend - ( vec * multiplier )
        auto r = _mm_mul_ps(vec, multiplier);
        return _mm_sub_ps(minuend, r);
    }

    FORCE_INLINE Vector Vector::LinearCombination(const Vector& v0, const Vector& v1, float scale0, float scale1)
    {
        return (v0 * scale0) + (v1 * scale1);
    }

    FORCE_INLINE Vector Vector::Lerp(const Vector& from, const Vector& to, float t)
    {
        ASSERT(t >= 0.0f && t <= 1.0f);

        Vector L = _mm_sub_ps(to, from);
        Vector S = _mm_set_ps1(t);

        Vector result;
        result = _mm_mul_ps(L, S);
        result = _mm_add_ps(result, from);
        return result;
    }

    FORCE_INLINE Vector Vector::NLerp(const Vector& from, const Vector& to, float t)
    {
        ASSERT(t >= 0.0f && t <= 1.0f);

        // Calculate the final length
        auto const fromLength = from.Length3();
        auto const toLength = to.Length3();
        auto const finalLength = Vector::Lerp(fromLength, toLength, t);

        // Normalize vectors
        Vector const normalizedFrom = from / fromLength;
        Vector const normalizedTo = to / toLength;

        // LERP
        auto const finalDirection = Lerp(normalizedFrom, normalizedTo, t);
        auto result = finalDirection.GetNormalized3() * finalLength;
        return result;
    }

    FORCE_INLINE Vector Vector::Select(const Vector& v0, const Vector& v1, const Vector& control)
    {
        auto const ctrl = _mm_cmpneq_ps(control, Vector::Zero);

        Vector result;
        auto vTemp1 = _mm_andnot_ps(ctrl, v0);
        auto vTemp2 = _mm_and_ps(v1, ctrl);
        result = _mm_or_ps(vTemp1, vTemp2);
        return result;
    }

    template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
    FORCE_INLINE Vector Vector::Permute(const Vector& v0, const Vector& v1)
    {
        static_assert(PermuteX <= 7, "Element index parameter out of range");
        static_assert(PermuteY <= 7, "Element index parameter out of range");
        static_assert(PermuteZ <= 7, "Element index parameter out of range");
        static_assert(PermuteW <= 7, "Element index parameter out of range");

        uint32_t const shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
        bool const whichX = PermuteX > 3;
        bool const whichY = PermuteY > 3;
        bool const whichZ = PermuteZ > 3;
        bool const whichW = PermuteW > 3;

        static SIMD::UIntMask const selectMask = { whichX ? 0xFFFFFFFF : 0, whichY ? 0xFFFFFFFF : 0, whichZ ? 0xFFFFFFFF : 0, whichW ? 0xFFFFFFFF : 0 };
        __m128 shuffled1 = _mm_shuffle_ps(v0, v0, shuffle);
        __m128 shuffled2 = _mm_shuffle_ps(v1, v1, shuffle);
        __m128 masked1 = _mm_andnot_ps(selectMask, shuffled1);
        __m128 masked2 = _mm_and_ps(selectMask, shuffled2);
        return _mm_or_ps(masked1, masked2);
    }

    FORCE_INLINE Vector Vector::Sin(const Vector& vec)
    {
        // Force the value within the bounds of pi
        auto m_x = Vector::AngleMod2Pi(vec);

        // Map in [-pi/2,pi/2] with sin(m_y) = sin(m_x).
        __m128 sign = _mm_and_ps(m_x, SIMD::g_signMask);
        __m128 c = _mm_or_ps(Vector::Pi, sign);  // pi when m_x >= 0, -pi when m_x < 0
        __m128 absx = _mm_andnot_ps(sign, m_x);  // |m_x|
        __m128 rflx = _mm_sub_ps(c, m_x);
        __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo);
        __m128 select0 = _mm_and_ps(comp, m_x);
        __m128 select1 = _mm_andnot_ps(comp, rflx);
        m_x = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation
        const auto SC1 = SIMD::g_sinCoefficients1;
        auto vConstants = _mm_shuffle_ps(SC1, SC1, _MM_SHUFFLE(0, 0, 0, 0));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        const auto SC0 = SIMD::g_sinCoefficients0;
        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(3, 3, 3, 3));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);
        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, m_x);
        return Result;
    }

    FORCE_INLINE Vector Vector::Cos(const Vector& vec)
    {
        // Map V to m_x in [-pi,pi].
        auto m_x = Vector::AngleMod2Pi(vec);

        // Map in [-pi/2,pi/2] with cos(m_y) = sign*cos(m_x).
        auto sign = _mm_and_ps(m_x, SIMD::g_signMask);
        __m128 c = _mm_or_ps(Vector::Pi, sign);  // pi when m_x >= 0, -pi when m_x < 0
        __m128 absx = _mm_andnot_ps(sign, m_x);  // |m_x|
        __m128 rflx = _mm_sub_ps(c, m_x);
        __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo);
        __m128 select0 = _mm_and_ps(comp, m_x);
        __m128 select1 = _mm_andnot_ps(comp, rflx);
        m_x = _mm_or_ps(select0, select1);
        select0 = _mm_and_ps(comp, Vector::One);
        select1 = _mm_andnot_ps(comp, Vector::NegativeOne);
        sign = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation
        const auto CC1 = SIMD::g_cosCoefficients1;
        auto vConstants = _mm_shuffle_ps(CC1, CC1, _MM_SHUFFLE(0, 0, 0, 0));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        const auto CC0 = SIMD::g_cosCoefficients0;
        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(3, 3, 3, 3));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);
        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, sign);
        return Result;
    }

    FORCE_INLINE Vector Vector::Tan(const Vector& vec)
    {
        static const Vector tanCoefficients0 = { 1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f };
        static const Vector tanCoefficients1 = { 4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f };
        static const Vector tanConstants = { 1.570796371f, 6.077100628e-11f, 0.000244140625f, 0.63661977228f /*2 / Pi*/ };
        static const SIMD::UIntMask mask = { 0x1, 0x1, 0x1, 0x1 };

        Vector TwoDivPi = tanConstants.GetSplatW();
        Vector C0 = tanConstants.GetSplatX();
        Vector C1 = tanConstants.GetSplatY();
        Vector vEpsilon = tanConstants.GetSplatZ();

        Vector VA = (vec * TwoDivPi).Round();
        Vector VC = Vector::NegativeMultiplySubtract(VA, C0, vec);
        Vector VB = VA.GetAbs();
        VC = Vector::NegativeMultiplySubtract(VA, C1, VC);
        reinterpret_cast<__m128i*>(&VB)[0] = _mm_cvttps_epi32(VB);

        Vector VC2 = VC * VC;
        Vector T7 = tanCoefficients1.GetSplatW();
        Vector T6 = tanCoefficients1.GetSplatZ();
        Vector T4 = tanCoefficients1.GetSplatX();
        Vector T3 = tanCoefficients0.GetSplatW();
        Vector T5 = tanCoefficients1.GetSplatY();
        Vector T2 = tanCoefficients0.GetSplatZ();
        Vector T1 = tanCoefficients0.GetSplatY();
        Vector T0 = tanCoefficients0.GetSplatX();

        Vector VBIsEven = _mm_and_ps(VB, mask);
        VBIsEven = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(VBIsEven), _mm_castps_si128(Vector::Zero)));

        Vector N = Vector::MultiplyAdd(VC2, T7, T6);
        Vector D = Vector::MultiplyAdd(VC2, T4, T3);
        N = Vector::MultiplyAdd(VC2, N, T5);
        D = Vector::MultiplyAdd(VC2, D, T2);
        N = VC2 * N;
        D = Vector::MultiplyAdd(VC2, D, T1);
        N = Vector::MultiplyAdd(VC, N, VC);
        Vector VCNearZero = VC.InBounds(vEpsilon);
        D = Vector::MultiplyAdd(VC2, D, T0);

        N = Vector::Select(N, VC, VCNearZero);
        D = Vector::Select(D, Vector::One, VCNearZero);

        Vector R0 = N.GetNegated();
        Vector R1 = N / D;
        R0 = D / R0;

        Vector VIsZero = vec.EqualsZero();
        Vector Result = Vector::Select(R0, R1, VBIsEven);
        Result = Vector::Select(Result, Zero, VIsZero);

        return Result;
    }

    FORCE_INLINE Vector Vector::ASin(const Vector& vec)
    {
        __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero);
        __m128 mvalue = _mm_sub_ps(Vector::Zero, vec);
        __m128 m_x = _mm_max_ps(vec, mvalue);  // |vec|

        // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number.
        __m128 oneMValue = _mm_sub_ps(Vector::One, m_x);
        __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue);
        __m128 root = _mm_sqrt_ps(clampOneMValue);  // sqrt(1-|vec|)

        // Compute polynomial approximation
        const auto AC1 = SIMD::g_arcCoefficients1;
        auto vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 t0 = _mm_mul_ps(vConstants, m_x);

        vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(2, 2, 2, 2));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(1, 1, 1, 1));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(0, 0, 0, 0));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        const auto AC0 = SIMD::g_arcCoefficients0;
        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(3, 3, 3, 3));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(2, 2, 2, 2));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(1, 1, 1, 1));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(0, 0, 0, 0));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, root);

        __m128 t1 = _mm_sub_ps(Vector::Pi, t0);
        t0 = _mm_and_ps(nonnegative, t0);
        t1 = _mm_andnot_ps(nonnegative, t1);
        t0 = _mm_or_ps(t0, t1);
        t0 = _mm_sub_ps(Vector::PiDivTwo, t0);
        return t0;
    }

    FORCE_INLINE Vector Vector::ACos(const Vector& vec)
    {
        __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero);
        __m128 mvalue = _mm_sub_ps(Vector::Zero, vec);
        __m128 m_x = _mm_max_ps(vec, mvalue);  // |vec|

        // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number.
        __m128 oneMValue = _mm_sub_ps(Vector::One, m_x);
        __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue);
        __m128 root = _mm_sqrt_ps(clampOneMValue);  // sqrt(1-|vec|)

        // Compute polynomial approximation
        const auto AC1 = SIMD::g_arcCoefficients1;
        auto vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 t0 = _mm_mul_ps(vConstants, m_x);

        vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(2, 2, 2, 2));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(1, 1, 1, 1));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC1, AC1, _MM_SHUFFLE(0, 0, 0, 0));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        const auto AC0 = SIMD::g_arcCoefficients0;
        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(3, 3, 3, 3));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(2, 2, 2, 2));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(1, 1, 1, 1));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AC0, AC0, _MM_SHUFFLE(0, 0, 0, 0));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, root);

        __m128 t1 = _mm_sub_ps(Vector::Pi, t0);
        t0 = _mm_and_ps(nonnegative, t0);
        t1 = _mm_andnot_ps(nonnegative, t1);
        t0 = _mm_or_ps(t0, t1);
        return t0;
    }

    FORCE_INLINE Vector Vector::ATan(const Vector& vec)
    {
        __m128 absV = vec.GetAbs();
        __m128 invV = _mm_div_ps(Vector::One, vec);
        __m128 comp = _mm_cmpgt_ps(vec, Vector::One);
        __m128 select0 = _mm_and_ps(comp, Vector::One);
        __m128 select1 = _mm_andnot_ps(comp, Vector::NegativeOne);
        __m128 sign = _mm_or_ps(select0, select1);
        comp = _mm_cmple_ps(absV, Vector::One);
        select0 = _mm_and_ps(comp, Vector::Zero);
        select1 = _mm_andnot_ps(comp, sign);
        sign = _mm_or_ps(select0, select1);
        select0 = _mm_and_ps(comp, vec);
        select1 = _mm_andnot_ps(comp, invV);
        __m128 m_x = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation
        Vector const TC1 = SIMD::g_aTanCoefficients1;
        Vector vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(TC1, TC1, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        Vector const TC0 = SIMD::g_aTanCoefficients0;
        vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(3, 3, 3, 3));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(TC0, TC0, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);
        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, m_x);
        __m128 result1 = _mm_mul_ps(sign, Vector::PiDivTwo);
        result1 = _mm_sub_ps(result1, Result);

        comp = _mm_cmpeq_ps(sign, Vector::Zero);
        select0 = _mm_and_ps(comp, Result);
        select1 = _mm_andnot_ps(comp, result1);
        Result = _mm_or_ps(select0, select1);
        return Result;
    }

    FORCE_INLINE Vector Vector::ATan2(const Vector& Y, const Vector& X)
    {
        Vector ATanResultValid = Vector(SIMD::g_trueMask);

        Vector vPi = Vector(SIMD::g_aTan2Constants).GetSplatX();
        Vector vPiOverTwo = Vector(SIMD::g_aTan2Constants).GetSplatY();
        Vector vPiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatZ();
        Vector vThreePiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatW();

        Vector YEqualsZero = Y.EqualsZero();
        Vector XEqualsZero = X.EqualsZero();
        Vector XIsPositive = _mm_and_ps(X, SIMD::g_signMask);
        XIsPositive = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XIsPositive), _mm_castps_si128(Vector::Zero)));
        Vector YEqualsInfinity = Y.EqualsInfinity();
        Vector XEqualsInfinity = X.EqualsInfinity();

        Vector YSign = _mm_and_ps(Y, SIMD::g_signMask);
        vPi = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPi), _mm_castps_si128(YSign)));
        vPiOverTwo = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverTwo), _mm_castps_si128(YSign)));
        vPiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverFour), _mm_castps_si128(YSign)));
        vThreePiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vThreePiOverFour), _mm_castps_si128(YSign)));

        Vector R1 = Vector::Select(vPi, YSign, XIsPositive);
        Vector R2 = Vector::Select(ATanResultValid, vPiOverTwo, XEqualsZero);
        Vector R3 = Vector::Select(R2, R1, YEqualsZero);
        Vector R4 = Vector::Select(vThreePiOverFour, vPiOverFour, XIsPositive);
        Vector R5 = Vector::Select(vPiOverTwo, R4, XEqualsInfinity);
        Vector Result = Vector::Select(R3, R5, YEqualsInfinity);
        ATanResultValid = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(Result), _mm_castps_si128(ATanResultValid)));

        Vector V = Y / X;
        Vector R0 = Vector::ATan(V);
        R1 = Vector::Select(vPi, Vector(SIMD::g_signMask), XIsPositive);
        R2 = R0 + R1;

        return Vector::Select(Result, R2, ATanResultValid);
    }

    FORCE_INLINE Vector Vector::SinEst(const Vector& vec)
    {
        // Force the value within the bounds of pi
        auto m_x = Vector::AngleMod2Pi(vec);

        // Map in [-pi/2,pi/2] with sin(m_y) = sin(m_x).
        __m128 sign = _mm_and_ps(m_x, SIMD::g_signMask);
        __m128 c = _mm_or_ps(Vector::Pi, sign);  // pi when m_x >= 0, -pi when m_x < 0
        __m128 absx = _mm_andnot_ps(sign, m_x);  // |m_x|
        __m128 rflx = _mm_sub_ps(c, m_x);
        __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo);
        __m128 select0 = _mm_and_ps(comp, m_x);
        __m128 select1 = _mm_andnot_ps(comp, rflx);
        m_x = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation
        const auto SEC = SIMD::g_sinCoefficients1;
        auto vConstants = _mm_shuffle_ps(SEC, SEC, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        vConstants = _mm_shuffle_ps(SEC, SEC, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SEC, SEC, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, m_x);
        return Result;
    }

    FORCE_INLINE Vector Vector::CosEst(const Vector& vec)
    {
        // Map V to m_x in [-pi,pi].
        auto m_x = Vector::AngleMod2Pi(vec);

        // Map in [-pi/2,pi/2] with cos(m_y) = sign*cos(m_x).
        auto sign = _mm_and_ps(m_x, SIMD::g_signMask);
        __m128 c = _mm_or_ps(Vector::Pi, sign);  // pi when m_x >= 0, -pi when m_x < 0
        __m128 absx = _mm_andnot_ps(sign, m_x);  // |m_x|
        __m128 rflx = _mm_sub_ps(c, m_x);
        __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo);
        __m128 select0 = _mm_and_ps(comp, m_x);
        __m128 select1 = _mm_andnot_ps(comp, rflx);
        m_x = _mm_or_ps(select0, select1);
        select0 = _mm_and_ps(comp, Vector::One);
        select1 = _mm_andnot_ps(comp, Vector::NegativeOne);
        sign = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation
        const auto CEC = SIMD::g_cosCoefficients1;
        auto vConstants = _mm_shuffle_ps(CEC, CEC, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        vConstants = _mm_shuffle_ps(CEC, CEC, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CEC, CEC, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, sign);
        return Result;
    }

    FORCE_INLINE Vector Vector::TanEst(const Vector& vec)
    {
        Vector W = Vector(SIMD::g_tanEstCoefficients).GetSplatW();
        Vector V1 = (vec * W).Round();
        V1 = Vector::NegativeMultiplySubtract(Vector::Pi, V1, vec);

        Vector const T0 = Vector(SIMD::g_tanEstCoefficients).GetSplatX();
        Vector const T1 = Vector(SIMD::g_tanEstCoefficients).GetSplatY();
        Vector const T2 = Vector(SIMD::g_tanEstCoefficients).GetSplatZ();

        auto V2T2 = Vector::NegativeMultiplySubtract(V1, V1, T2);
        auto V2 = V1 * V1;
        auto V1T0 = V1 * T0;
        auto V1T1 = V1 * T1;

        auto N = Vector::MultiplyAdd(V2, V1T1, V1T0);
        auto D = V2T2.GetInverseEst();
        return N * D;
    }

    FORCE_INLINE Vector Vector::ASinEst(const Vector& vec)
    {
        __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero);
        __m128 mvalue = _mm_sub_ps(Vector::Zero, vec);
        __m128 m_x = _mm_max_ps(vec, mvalue);  // |vec|

        // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number.
        __m128 oneMValue = _mm_sub_ps(Vector::One, m_x);
        __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue);
        __m128 root = _mm_sqrt_ps(clampOneMValue);  // sqrt(1-|vec|)

        // Compute polynomial approximation
        const auto AEC = SIMD::g_arcEstCoefficients;
        auto vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 t0 = _mm_mul_ps(vConstants, m_x);

        vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(2, 2, 2, 2));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(1, 1, 1, 1));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(0, 0, 0, 0));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, root);

        __m128 t1 = _mm_sub_ps(Vector::Pi, t0);
        t0 = _mm_and_ps(nonnegative, t0);
        t1 = _mm_andnot_ps(nonnegative, t1);
        t0 = _mm_or_ps(t0, t1);
        t0 = _mm_sub_ps(Vector::PiDivTwo, t0);
        return t0;
    }

    FORCE_INLINE Vector Vector::ACosEst(const Vector& vec)
    {
        __m128 nonnegative = _mm_cmpge_ps(vec, Vector::Zero);
        __m128 mvalue = _mm_sub_ps(Vector::Zero, vec);
        __m128 m_x = _mm_max_ps(vec, mvalue);  // |vec|

        // Compute (1-|vec|), clamp to zero to avoid sqrt of negative number.
        __m128 oneMValue = _mm_sub_ps(Vector::One, m_x);
        __m128 clampOneMValue = _mm_max_ps(Vector::Zero, oneMValue);
        __m128 root = _mm_sqrt_ps(clampOneMValue);  // sqrt(1-|vec|)

        // Compute polynomial approximation
        auto vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 t0 = _mm_mul_ps(vConstants, m_x);

        vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(2, 2, 2, 2));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(1, 1, 1, 1));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, m_x);

        vConstants = _mm_shuffle_ps(SIMD::g_arcEstCoefficients, SIMD::g_arcEstCoefficients, _MM_SHUFFLE(0, 0, 0, 0));
        t0 = _mm_add_ps(t0, vConstants);
        t0 = _mm_mul_ps(t0, root);

        __m128 t1 = _mm_sub_ps(Vector::Pi, t0);
        t0 = _mm_and_ps(nonnegative, t0);
        t1 = _mm_andnot_ps(nonnegative, t1);
        t0 = _mm_or_ps(t0, t1);
        return t0;
    }

    FORCE_INLINE Vector Vector::ATanEst(const Vector& vec)
    {
        __m128 absV = vec.GetAbs();
        __m128 invV = _mm_div_ps(Vector::One, vec);
        __m128 comp = _mm_cmpgt_ps(vec, Vector::One);
        __m128 select0 = _mm_and_ps(comp, Vector::One);
        __m128 select1 = _mm_andnot_ps(comp, Vector::NegativeOne);
        __m128 sign = _mm_or_ps(select0, select1);
        comp = _mm_cmple_ps(absV, Vector::One);
        select0 = _mm_and_ps(comp, Vector::Zero);
        select1 = _mm_andnot_ps(comp, sign);
        sign = _mm_or_ps(select0, select1);
        select0 = _mm_and_ps(comp, vec);
        select1 = _mm_andnot_ps(comp, invV);
        __m128 m_x = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation
        Vector const AEC = SIMD::g_aTanEstCoefficients1;
        Vector vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(AEC, AEC, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        // ATanEstCoefficients0 is already splatted
        Result = _mm_add_ps(Result, SIMD::g_aTanEstCoefficients0);
        Result = _mm_mul_ps(Result, m_x);
        __m128 result1 = _mm_mul_ps(sign, Vector::PiDivTwo);
        result1 = _mm_sub_ps(result1, Result);

        comp = _mm_cmpeq_ps(sign, Vector::Zero);
        select0 = _mm_and_ps(comp, Result);
        select1 = _mm_andnot_ps(comp, result1);
        Result = _mm_or_ps(select0, select1);
        return Result;
    }

    FORCE_INLINE Vector Vector::ATan2Est(const Vector& X, const Vector& Y)
    {
        Vector ATanResultValid = Vector(SIMD::g_trueMask);

        Vector vPi = Vector(SIMD::g_aTan2Constants).GetSplatX();
        Vector vPiOverTwo = Vector(SIMD::g_aTan2Constants).GetSplatY();
        Vector vPiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatZ();
        Vector vThreePiOverFour = Vector(SIMD::g_aTan2Constants).GetSplatW();

        Vector YEqualsZero = Y.EqualsZero();
        Vector XEqualsZero = X.EqualsZero();
        Vector XIsPositive = _mm_and_ps(X, SIMD::g_signMask);
        XIsPositive = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(XIsPositive), _mm_castps_si128(Vector::Zero)));
        Vector YEqualsInfinity = Y.EqualsInfinity();
        Vector XEqualsInfinity = X.EqualsInfinity();

        Vector YSign = _mm_and_ps(Y, SIMD::g_signMask);
        vPi = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPi), _mm_castps_si128(YSign)));
        vPiOverTwo = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverTwo), _mm_castps_si128(YSign)));
        vPiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vPiOverFour), _mm_castps_si128(YSign)));
        vThreePiOverFour = _mm_castsi128_ps(_mm_or_si128(_mm_castps_si128(vThreePiOverFour), _mm_castps_si128(YSign)));

        Vector R1 = Vector::Select(vPi, YSign, XIsPositive);
        Vector R2 = Vector::Select(ATanResultValid, vPiOverTwo, XEqualsZero);
        Vector R3 = Vector::Select(R2, R1, YEqualsZero);
        Vector R4 = Vector::Select(vThreePiOverFour, vPiOverFour, XIsPositive);
        Vector R5 = Vector::Select(vPiOverTwo, R4, XEqualsInfinity);
        Vector Result = Vector::Select(R3, R5, YEqualsInfinity);
        ATanResultValid = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_castps_si128(Result), _mm_castps_si128(ATanResultValid)));

        Vector Reciprocal = X.GetInverseEst();
        Vector V = Y * Reciprocal;
        Vector R0 = Vector::ATanEst(V);

        R1 = Vector::Select(vPi, Vector(SIMD::g_signMask), XIsPositive);
        R2 = R0 + R1;
        Result = Vector::Select(Result, R2, ATanResultValid);

        return Result;
    }

    FORCE_INLINE void Vector::SinCos(Vector& sin, Vector& cos, float angle)
    {
        return SinCos(sin, cos, Vector(angle));
    }

    FORCE_INLINE void Vector::SinCos(Vector& sin, Vector& cos, const Vector& angle)
    {
        // Force the value within the bounds of pi
        auto m_x = Vector::AngleMod2Pi(angle);

        // Map in [-pi/2,pi/2] with sin(m_y) = sin(m_x), cos(m_y) = sign*cos(m_x).
        auto sign = _mm_and_ps(m_x, SIMD::g_signMask);
        __m128 c = _mm_or_ps(Vector::Pi, sign);  // pi when m_x >= 0, -pi when m_x < 0
        __m128 absx = _mm_andnot_ps(sign, m_x);  // |m_x|
        __m128 rflx = _mm_sub_ps(c, m_x);
        __m128 comp = _mm_cmple_ps(absx, Vector::PiDivTwo);
        __m128 select0 = _mm_and_ps(comp, m_x);
        __m128 select1 = _mm_andnot_ps(comp, rflx);
        m_x = _mm_or_ps(select0, select1);
        select0 = _mm_and_ps(comp, Vector::One);
        select1 = _mm_andnot_ps(comp, Vector::NegativeOne);
        sign = _mm_or_ps(select0, select1);

        __m128 x2 = _mm_mul_ps(m_x, m_x);

        // Compute polynomial approximation of sine
        const auto SC1 = SIMD::g_sinCoefficients1;
        auto vConstants = _mm_shuffle_ps(SC1, SC1, _MM_SHUFFLE(0, 0, 0, 0));
        __m128 Result = _mm_mul_ps(vConstants, x2);

        const auto SC0 = SIMD::g_sinCoefficients0;
        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(3, 3, 3, 3));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(SC0, SC0, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);
        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, m_x);
        sin = Result;

        // Compute polynomial approximation of cosine
        const auto CC1 = SIMD::g_cosCoefficients1;
        vConstants = _mm_shuffle_ps(CC1, CC1, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_mul_ps(vConstants, x2);

        const auto CC0 = SIMD::g_cosCoefficients0;
        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(3, 3, 3, 3));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(2, 2, 2, 2));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(1, 1, 1, 1));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);

        vConstants = _mm_shuffle_ps(CC0, CC0, _MM_SHUFFLE(0, 0, 0, 0));
        Result = _mm_add_ps(Result, vConstants);
        Result = _mm_mul_ps(Result, x2);
        Result = _mm_add_ps(Result, Vector::One);
        Result = _mm_mul_ps(Result, sign);
        cos = Result;
    }

    FORCE_INLINE Vector Vector::AngleMod2Pi(const Vector& angles)
    {
        // Modulo the range of the given angles such that -Pi <= Angles < Pi
        Vector result = _mm_mul_ps(angles, Vector::OneDivTwoPi);
        result.Round();
        result = _mm_mul_ps(result, Vector::TwoPi);
        result = _mm_sub_ps(angles, result);
        return result;
    }

    FORCE_INLINE Vector::operator __m128& ()
    {
        return m_data;
    }

    FORCE_INLINE Vector::operator const __m128& () const
    {
        return m_data;
    }

    FORCE_INLINE Vector::Vector()
    {
    }

    FORCE_INLINE Vector::Vector(Axis axis)
    {
        switch (axis)
        {
        case Axis::X: *this = Vector::UnitX; break;
        case Axis::Y: *this = Vector::UnitY; break;
        case Axis::Z: *this = Vector::UnitZ; break;
        default: HALT(); break;
        }
    }

    FORCE_INLINE Vector::Vector(ZeroInit_t)
    {
        memset(this, 0, sizeof(Vector));
    }

    FORCE_INLINE Vector::Vector(float v)
    {
        m_data = _mm_set1_ps(v);
    }

    FORCE_INLINE Vector::Vector(__m128 v)
        : m_data(v)
    {
    }

    FORCE_INLINE Vector::Vector(float ix, float iy, float iz, float iw)
    {
        m_data = _mm_set_ps(iw, iz, iy, ix);
    }

    FORCE_INLINE Vector::Vector(const Float2& v, float iz, float iw)
    {
        m_data = _mm_set_ps(iw, iz, v.m_y, v.m_x);
    }

    FORCE_INLINE Vector::Vector(const Float3& v, float iw)
    {
        m_data = _mm_set_ps(iw, v.m_z, v.m_y, v.m_x);
    }

    FORCE_INLINE Vector::Vector(const Float4& v)
    {
        m_data = _mm_loadu_ps(&v.m_x);
    }

    FORCE_INLINE Vector::Vector(const float* pValues)
    {
        m_data = _mm_loadu_ps(pValues);
    }

    FORCE_INLINE bool Vector::IsValid() const
    {
        return !IsNaN4() && !IsInfinite4();
    }

    FORCE_INLINE void Vector::Store(float* pValues) const
    {
        _mm_storeu_ps(pValues, m_data);
    }

    FORCE_INLINE void Vector::StoreFloat(float& value) const
    {
        _mm_store_ss(&value, m_data);
    }

    FORCE_INLINE void Vector::StoreFloat2(Float2& value) const
    {
        auto yVec = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1));
        _mm_store_ss(&value.m_x, m_data);
        _mm_store_ss(&value.m_y, yVec);
    }

    FORCE_INLINE void Vector::StoreFloat3(Float3& value) const
    {
        auto yVec = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1));
        auto zVec = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2));
        _mm_store_ss(&value.m_x, m_data);
        _mm_store_ss(&value.m_y, yVec);
        _mm_store_ss(&value.m_z, zVec);
    }

    FORCE_INLINE void Vector::StoreFloat4(Float4& value) const
    {
        _mm_storeu_ps(&value.m_x, m_data);
    }

    FORCE_INLINE float Vector::ToFloat() const
    {
        float v;
        StoreFloat(v);
        return v;
    }

    FORCE_INLINE Float2 Vector::ToFloat2() const
    {
        Float2 v;
        StoreFloat2(v);
        return v;
    }

    FORCE_INLINE Float3 Vector::ToFloat3() const
    {
        Float3 v;
        StoreFloat3(v);
        return v;
    }

    FORCE_INLINE Float4 Vector::ToFloat4() const
    {
        Float4 v;
        StoreFloat4(v);
        return v;
    }

    FORCE_INLINE Vector::operator Float2() const
    {
        return ToFloat2();
    }

    FORCE_INLINE Vector::operator Float3() const
    {
        return ToFloat3();
    }

    FORCE_INLINE Vector::operator Float4() const
    {
        return ToFloat4();
    }

    FORCE_INLINE float Vector::GetX() const
    {
        return _mm_cvtss_f32(m_data);
    }

    FORCE_INLINE float Vector::GetY() const
    {
        auto vTemp = GetSplatY();
        return _mm_cvtss_f32(vTemp);
    }

    FORCE_INLINE float Vector::GetZ() const
    {
        auto vTemp = GetSplatZ();
        return _mm_cvtss_f32(vTemp);
    }

    FORCE_INLINE float Vector::GetW() const
    {
        auto vTemp = GetSplatW();
        return _mm_cvtss_f32(vTemp);
    }

    FORCE_INLINE void Vector::SetX(float x)
    {
        m_data = _mm_move_ss(m_data, _mm_set_ss(x));
    }

    FORCE_INLINE void Vector::SetY(float y)
    {
        m_data = _mm_insert_ps(m_data, _mm_set_ss(y), 0x10);
    }

    FORCE_INLINE void Vector::SetZ(float z)
    {
        m_data = _mm_insert_ps(m_data, _mm_set_ss(z), 0x20);
    }

    FORCE_INLINE void Vector::SetW(float w)
    {
        m_data = _mm_insert_ps(m_data, _mm_set_ss(w), 0x30);
    }

    FORCE_INLINE float Vector::operator[](uint32_t i) const
    {
        ASSERT(i < 4);

        switch (i)
        {
        case 0: return GetX(); break;
        case 1: return GetY(); break;
        case 2: return GetZ(); break;
        case 3: return GetW(); break;
        }

        UNREACHABLE_CODE();
        return 0.0f;
    }

    FORCE_INLINE bool Vector::IsW1() const
    {
        return GetSplatW().IsEqual4(Vector::One);
    }

    FORCE_INLINE bool Vector::IsW0() const
    {
        return GetSplatW().IsZero4();
    }

    FORCE_INLINE Vector& Vector::SetW0()
    {
        SetW(0.0f);
        return *this;
    }

    FORCE_INLINE Vector& Vector::SetW1()
    {
        SetW(1.0f);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetWithW0() const
    {
        Vector v = *this;
        v.SetW0();
        return v;
    }

    FORCE_INLINE Vector Vector::GetWithW1() const
    {
        Vector v = *this;
        v.SetW1();
        return v;
    }

    FORCE_INLINE Vector Vector::Get2D() const
    {
        return Vector::Select(*this, Vector::Zero, Vector::Select0011);
    }

    FORCE_INLINE Vector Vector::Get3D() const
    {
        return Vector::Select(*this, Vector::Zero, Vector::Select0001);
    }

    FORCE_INLINE Vector Vector::operator+(const Vector& v) const
    {
        return _mm_add_ps(m_data, v);
    }

    FORCE_INLINE Vector& Vector::operator+=(const Vector& v)
    {
        m_data = _mm_add_ps(m_data, v);
        return *this;
    }

    FORCE_INLINE Vector Vector::operator-(const Vector& v) const
    {
        return _mm_sub_ps(m_data, v);
    }

    FORCE_INLINE Vector& Vector::operator-=(const Vector& v)
    {
        m_data = _mm_sub_ps(m_data, v);
        return *this;
    }

    FORCE_INLINE Vector Vector::operator*(const Vector& v) const
    {
        return _mm_mul_ps(m_data, v);
    }

    FORCE_INLINE Vector& Vector::operator*=(const Vector& v)
    {
        m_data = _mm_mul_ps(m_data, v);
        return *this;
    }

    FORCE_INLINE Vector Vector::operator/(const Vector& v) const
    {
        return _mm_div_ps(m_data, v);
    }

    FORCE_INLINE Vector& Vector::operator/=(const Vector& v)
    {
        m_data = _mm_div_ps(m_data, v);
        return *this;
    }

    FORCE_INLINE Vector Vector::operator*(float const f) const
    {
        return operator*(Vector(f));
    }

    FORCE_INLINE Vector& Vector::operator*=(float const f)
    {
        return operator*=(Vector(f));
    }

    FORCE_INLINE Vector Vector::operator/(float const f) const
    {
        return operator/(Vector(f));
    }

    FORCE_INLINE Vector& Vector::operator/=(float const f)
    {
        return operator/=(Vector(f));
    }

    FORCE_INLINE Vector Vector::operator-() const
    {
        return GetNegated();
    }

    FORCE_INLINE Vector Vector::Orthogonal2D() const
    {
        static Vector const negX(-1.0f, 1.0f, 1.0f, 1.0f);

        Vector result;
        result = _mm_shuffle_ps(*this, *this, _MM_SHUFFLE(3, 2, 0, 1));
        result = _mm_mul_ps(result, negX);
        return result;
    }

    FORCE_INLINE Vector Vector::Cross2(const Vector& other) const
    {
        Vector vResult = _mm_shuffle_ps(other.m_data, other.m_data, _MM_SHUFFLE(0, 1, 0, 1));
        vResult = _mm_mul_ps(vResult, m_data);
        Vector vTemp = vResult.GetSplatY();
        vResult = _mm_sub_ss(vResult, vTemp);
        vResult = vResult.GetSplatX();
        return vResult;
    }

    FORCE_INLINE Vector Vector::Cross3(const Vector& other) const
    {
        auto vTemp1 = _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 0, 2, 1));
        auto vTemp2 = _mm_shuffle_ps(other, other, _MM_SHUFFLE(3, 1, 0, 2));
        Vector result = _mm_mul_ps(vTemp1, vTemp2);
        vTemp1 = _mm_shuffle_ps(vTemp1, vTemp1, _MM_SHUFFLE(3, 0, 2, 1));
        vTemp2 = _mm_shuffle_ps(vTemp2, vTemp2, _MM_SHUFFLE(3, 1, 0, 2));
        vTemp1 = _mm_mul_ps(vTemp1, vTemp2);
        result = _mm_sub_ps(result, vTemp1);
        result = _mm_and_ps(result, SIMD::g_maskXYZ0);
        return result;
    }

    FORCE_INLINE Vector Vector::Dot2(const Vector& other) const
    {
        // Perform the dot product on m_x and m_y
        Vector result = _mm_mul_ps(m_data, other);
        // vTemp has m_y splatted
        auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 1, 1, 1));
        // m_x+m_y
        result = _mm_add_ss(result, vTemp);
        result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0));
        return result;
    }

    FORCE_INLINE Vector Vector::Dot3(const Vector& vOther) const
    {
        // Perform the dot product
        auto vDot = _mm_mul_ps(m_data, vOther);
        // m_x=Dot.vector4_f32[1], m_y=Dot.vector4_f32[2]
        auto vTemp = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(2, 1, 2, 1));
        // Result.vector4_f32[0] = m_x+m_y
        vDot = _mm_add_ss(vDot, vTemp);
        // m_x=Dot.vector4_f32[2]
        vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1));
        // Result.vector4_f32[0] = (m_x+m_y)+m_z
        vDot = _mm_add_ss(vDot, vTemp);
        // Splat m_x
        Vector result = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(0, 0, 0, 0));
        return result;
    }

    FORCE_INLINE Vector Vector::Dot4(const Vector& other) const
    {
        auto vTemp2 = other;
        auto vTemp = _mm_mul_ps(m_data, vTemp2);
        vTemp2 = _mm_shuffle_ps(vTemp2, vTemp, _MM_SHUFFLE(1, 0, 0, 0)); // Copy X to the Z position and Y to the W position
        vTemp2 = _mm_add_ps(vTemp2, vTemp); // Add Z = X+Z; W = Y+W;
        vTemp = _mm_shuffle_ps(vTemp, vTemp2, _MM_SHUFFLE(0, 3, 0, 0));  // Copy W to the Z position
        vTemp = _mm_add_ps(vTemp, vTemp2); // Add Z and W together
        return _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(2, 2, 2, 2)); // Splat Z and return
    }

    FORCE_INLINE float Vector::GetDot2(const Vector& other) const
    {
        return Dot2(other).ToFloat();
    }

    FORCE_INLINE float Vector::GetDot3(const Vector& other) const
    {
        return Dot3(other).ToFloat();
    }

    FORCE_INLINE float Vector::GetDot4(const Vector& other) const
    {
        return Dot4(other).ToFloat();
    }

    FORCE_INLINE Vector Vector::ScalarProjection(const Vector& other) const
    {
        Vector const normalizedThis = GetNormalized3();
        Vector const projection = other.Dot3(normalizedThis);
        return projection;
    }

    FORCE_INLINE float Vector::GetScalarProjection(const Vector& other) const
    {
        return ScalarProjection(other).ToFloat();
    }

    FORCE_INLINE Vector Vector::VectorProjection(const Vector& other) const
    {
        Vector const normalizedThis = GetNormalized3();
        Vector const dotOther = other.Dot3(normalizedThis);
        Vector const projection = normalizedThis * dotOther;
        return projection;
    }

    FORCE_INLINE Vector& Vector::Invert()
    {
        m_data = _mm_div_ps(Vector::One, m_data);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetInverse() const
    {
        return _mm_div_ps(Vector::One, m_data);
    }

    FORCE_INLINE Vector Vector::GetReciprocal() const
    {
        return GetInverse();
    }

    FORCE_INLINE Vector& Vector::InvertEst()
    {
        m_data = _mm_rcp_ps(m_data);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetInverseEst() const
    {
        return _mm_rcp_ps(m_data);
    }

    FORCE_INLINE Vector& Vector::Negate()
    {
        m_data = _mm_sub_ps(Vector::Zero, m_data);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetNegated() const
    {
        return _mm_sub_ps(Vector::Zero, m_data);
    }

    FORCE_INLINE Vector& Vector::Abs()
    {
        m_data = _mm_max_ps(_mm_sub_ps(Vector::Zero, m_data), m_data);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetAbs() const
    {
        return _mm_max_ps(_mm_sub_ps(Vector::Zero, m_data), m_data);
    }

    FORCE_INLINE Vector& Vector::Sqrt()
    {
        m_data = _mm_sqrt_ps(m_data);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetSqrt()
    {
        return _mm_sqrt_ps(m_data);
    }

    FORCE_INLINE Vector& Vector::ReciprocalSqrt()
    {
        m_data = _mm_div_ps(Vector::One, _mm_sqrt_ps(m_data));
        return *this;
    }

    FORCE_INLINE Vector Vector::GetReciprocalSqrt()
    {
        return _mm_div_ps(Vector::One, _mm_sqrt_ps(m_data));
    }

    FORCE_INLINE Vector& Vector::EstimatedReciprocalSqrt()
    {
        m_data = _mm_rsqrt_ps(m_data);
        return *this;
    }

    FORCE_INLINE Vector Vector::GetEstimatedReciprocalSqrt()
    {
        return _mm_rsqrt_ps(m_data);
    }

    FORCE_INLINE Vector& Vector::Normalize2()
    {
        // Perform the dot product on m_x and m_y only
        auto vLengthSq = _mm_mul_ps(m_data, m_data);
        auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 1, 1, 1));
        vLengthSq = _mm_add_ss(vLengthSq, vTemp);
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
        // Prepare for the division
        auto vResult = _mm_sqrt_ps(vLengthSq);
        // Create zero with a single instruction
        auto vZeroMask = _mm_setzero_ps();
        // Test for a divide by zero (Must be FP to detect -0.0)
        vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
        // Failsafe on zero (Or epsilon) length planes
        // If the length is infinity, set the elements to zero
        vLengthSq = _mm_cmpneq_ps(vLengthSq, Vector::Infinity);
        // Divide to perform the normalization
        vResult = _mm_div_ps(m_data, vResult);
        // Any that are infinity, set to zero
        vResult = _mm_and_ps(vResult, vZeroMask);
        // Select qnan or result based on infinite length
        auto vTemp1 = _mm_andnot_ps(vLengthSq, Vector::QNaN);
        auto vTemp2 = _mm_and_ps(vResult, vLengthSq);
        m_data = _mm_or_ps(vTemp1, vTemp2);

        *this = Select(*this, Vector::Zero, Select0011);

        return *this;
    }

    FORCE_INLINE Vector& Vector::Normalize3()
    {
        // Perform the dot product on m_x,m_y and m_z only
        auto vLengthSq = _mm_mul_ps(m_data, m_data);
        auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 1, 2, 1));
        vLengthSq = _mm_add_ss(vLengthSq, vTemp);
        vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1));
        vLengthSq = _mm_add_ss(vLengthSq, vTemp);
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
        // Prepare for the division
        auto vResult = _mm_sqrt_ps(vLengthSq);
        // Create zero with a single instruction
        auto vZeroMask = _mm_setzero_ps();
        // Test for a divide by zero (Must be FP to detect -0.0)
        vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
        // Failsafe on zero (Or epsilon) length planes
        // If the length is infinity, set the elements to zero
        vLengthSq = _mm_cmpneq_ps(vLengthSq, Vector::Infinity);
        // Divide to perform the normalization
        vResult = _mm_div_ps(m_data, vResult);
        // Any that are infinity, set to zero
        vResult = _mm_and_ps(vResult, vZeroMask);
        // Select qnan or result based on infinite length
        auto vTemp1 = _mm_andnot_ps(vLengthSq, Vector::QNaN);
        auto vTemp2 = _mm_and_ps(vResult, vLengthSq);
        m_data = _mm_or_ps(vTemp1, vTemp2);

        *this = Select(*this, Vector::Zero, Select0001);

        return *this;
    }

    FORCE_INLINE Vector& Vector::Normalize4()
    {
        // Perform the dot product on m_x,m_y,m_z and m_w
        auto vLengthSq = _mm_mul_ps(m_data, m_data);
        // vTemp has m_z and m_w
        auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2));
        // m_x+m_z, m_y+m_w
        vLengthSq = _mm_add_ps(vLengthSq, vTemp);
        // m_x+m_z,m_x+m_z,m_x+m_z,m_y+m_w
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0));
        // ??,??,m_y+m_w,m_y+m_w
        vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0));
        // ??,??,m_x+m_z+m_y+m_w,??
        vLengthSq = _mm_add_ps(vLengthSq, vTemp);
        // Splat the length
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2));
        // Prepare for the division
        auto vResult = _mm_sqrt_ps(vLengthSq);
        // Create zero with a single instruction
        auto vZeroMask = _mm_setzero_ps();
        // Test for a divide by zero (Must be FP to detect -0.0)
        vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult);
        // Failsafe on zero (Or epsilon) length planes
        // If the length is infinity, set the elements to zero
        vLengthSq = _mm_cmpneq_ps(vLengthSq, Vector::Infinity);
        // Divide to perform the normalization
        vResult = _mm_div_ps(m_data, vResult);
        // Any that are infinity, set to zero
        vResult = _mm_and_ps(vResult, vZeroMask);
        // Select qnan or result based on infinite length
        auto vTemp1 = _mm_andnot_ps(vLengthSq, Vector::QNaN);
        auto vTemp2 = _mm_and_ps(vResult, vLengthSq);
        m_data = _mm_or_ps(vTemp1, vTemp2);

        return *this;
    }

    FORCE_INLINE Vector Vector::GetNormalized2() const
    {
        Vector v = *this;
        v.Normalize2();
        return v;
    }

    FORCE_INLINE Vector Vector::GetNormalized3() const
    {
        Vector v = *this;
        v.Normalize3();
        return v;
    }

    FORCE_INLINE Vector Vector::GetNormalized4() const
    {
        Vector v = *this;
        v.Normalize4();
        return v;
    }

    FORCE_INLINE Vector& Vector::Floor()
    {
        Vector result;

        // To handle NAN, INF and numbers greater than 8388608, use masking
        __m128i vTest = _mm_and_si128(_mm_castps_si128(m_data), SIMD::g_absMask);
        vTest = _mm_cmplt_epi32(vTest, SIMD::g_noFraction);
        // Truncate
        __m128i vInt = _mm_cvttps_epi32(m_data);
        result = _mm_cvtepi32_ps(vInt);
        __m128 vLarger = _mm_cmpgt_ps(result, m_data);
        // 0 -> 0, 0xffffffff -> -1.0f
        vLarger = _mm_cvtepi32_ps(_mm_castps_si128(vLarger));
        result = _mm_add_ps(result, vLarger);
        // All numbers less than 8388608 will use the round to int
        result = _mm_and_ps(result, _mm_castsi128_ps(vTest));
        // All others, use the ORIGINAL value
        vTest = _mm_andnot_si128(vTest, _mm_castps_si128(m_data));
        result = _mm_or_ps(result, _mm_castsi128_ps(vTest));

        m_data = result;
        return *this;
    }

    FORCE_INLINE Vector Vector::GetFloor() const
    {
        Vector v = *this;
        v.Floor();
        return v;
    }

    FORCE_INLINE Vector& Vector::Ceil()
    {
        Vector result;

        // To handle NAN, INF and numbers greater than 8388608, use masking
        __m128i vTest = _mm_and_si128(_mm_castps_si128(m_data), SIMD::g_absMask);
        vTest = _mm_cmplt_epi32(vTest, SIMD::g_noFraction);
        // Truncate
        __m128i vInt = _mm_cvttps_epi32(m_data);
        result = _mm_cvtepi32_ps(vInt);
        __m128 vSmaller = _mm_cmplt_ps(result, m_data);
        // 0 -> 0, 0xffffffff -> -1.0f
        vSmaller = _mm_cvtepi32_ps(_mm_castps_si128(vSmaller));
        result = _mm_sub_ps(result, vSmaller);
        // All numbers less than 8388608 will use the round to int
        result = _mm_and_ps(result, _mm_castsi128_ps(vTest));
        // All others, use the ORIGINAL value
        vTest = _mm_andnot_si128(vTest, _mm_castps_si128(m_data));
        result = _mm_or_ps(result, _mm_castsi128_ps(vTest));

        m_data = result;
        return *this;
    }

    FORCE_INLINE Vector Vector::GetCeil() const
    {
        Vector v = *this;
        v.Ceil();
        return v;
    }

    FORCE_INLINE Vector& Vector::Round()
    {
        __m128 sign = _mm_and_ps(m_data, SIMD::g_signMask);
        __m128 sMagic = _mm_or_ps(SIMD::g_noFraction, sign);
        __m128 R1 = _mm_add_ps(m_data, sMagic);
        R1 = _mm_sub_ps(R1, sMagic);
        __m128 R2 = _mm_and_ps(m_data, SIMD::g_absMask);
        __m128 mask = _mm_cmple_ps(R2, SIMD::g_noFraction);
        R2 = _mm_andnot_ps(mask, m_data);
        R1 = _mm_and_ps(R1, mask);
        m_data = _mm_xor_ps(R1, R2);
        return  *this;
    }

    FORCE_INLINE Vector Vector::GetRound() const
    {
        Vector v = *this;
        v.Round();
        return v;
    }

    FORCE_INLINE Vector Vector::GetSign() const
    {
        Vector const selectMask = GreaterThanEqual(Vector::Zero);
        return Vector::Select(Vector::NegativeOne, Vector::One, selectMask);
    }

    FORCE_INLINE Vector Vector::GetSplatX() const
    {
        return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(0, 0, 0, 0));
    }

    FORCE_INLINE Vector Vector::GetSplatY() const
    {
        return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1));
    }

    FORCE_INLINE Vector Vector::GetSplatZ() const
    {
        return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2));
    }

    FORCE_INLINE Vector Vector::GetSplatW() const
    {
        return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3));
    }

    template<uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx>
    FORCE_INLINE Vector Vector::Swizzle() const
    {
        static_assert(xIdx < 4, "Element index parameter out of range");
        static_assert(yIdx < 4, "Element index parameter out of range");
        static_assert(zIdx < 4, "Element index parameter out of range");
        static_assert(wIdx < 4, "Element index parameter out of range");
        return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(wIdx, zIdx, yIdx, xIdx));
    }

    FORCE_INLINE Vector Vector::Swizzle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const
    {
        ASSERT(xIdx < 4 && yIdx < 4 && zIdx < 4 && wIdx < 4);
        uint32_t const elem[4] = { xIdx, yIdx, zIdx, wIdx };
        __m128i vControl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&elem[0]));
        return _mm_permutevar_ps(m_data, vControl);
    }

    FORCE_INLINE Vector Vector::Shuffle(uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx) const
    {
        return Swizzle(xIdx, yIdx, zIdx, wIdx);
    }

    template<uint32_t xIdx, uint32_t yIdx, uint32_t zIdx, uint32_t wIdx>
    FORCE_INLINE Vector Vector::Shuffle() const
    {
        return Swizzle<xIdx, yIdx, zIdx, wIdx>();
    }

    FORCE_INLINE Vector Vector::Length2() const
    {
        Vector result;

        result = _mm_mul_ps(m_data, m_data);
        auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 1, 1, 1));
        // m_x+m_y
        result = _mm_add_ss(result, vTemp);
        result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0));
        result = _mm_sqrt_ps(result);
        return result;
    }

    FORCE_INLINE Vector Vector::Length3() const
    {
        Vector result;

        // Perform the dot product on m_x,m_y and m_z
        result = _mm_mul_ps(m_data, m_data);
        // vTemp has m_z and m_y
        auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 2, 1, 2));
        // m_x+m_z, m_y
        result = _mm_add_ss(result, vTemp);
        // m_y,m_y,m_y,m_y
        vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1));
        // m_x+m_z+m_y,??,??,??
        result = _mm_add_ss(result, vTemp);
        // Splat the length squared
        result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(0, 0, 0, 0));
        // Get the length
        result = _mm_sqrt_ps(result);

        return result;
    }

    FORCE_INLINE Vector Vector::Length4() const
    {
        Vector result;

        // Perform the dot product on m_x,m_y,m_z and m_w
        result = _mm_mul_ps(m_data, m_data);
        // vTemp has m_z and m_w
        auto vTemp = _mm_shuffle_ps(result, result, _MM_SHUFFLE(3, 2, 3, 2));
        // m_x+m_z, m_y+m_w
        result = _mm_add_ps(result, vTemp);
        // m_x+m_z,m_x+m_z,m_x+m_z,m_y+m_w
        result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(1, 0, 0, 0));
        // ??,??,m_y+m_w,m_y+m_w
        vTemp = _mm_shuffle_ps(vTemp, result, _MM_SHUFFLE(3, 3, 0, 0));
        // ??,??,m_x+m_z+m_y+m_w,??
        result = _mm_add_ps(result, vTemp);
        // Splat the length
        result = _mm_shuffle_ps(result, result, _MM_SHUFFLE(2, 2, 2, 2));
        // Get the length
        result = _mm_sqrt_ps(result);

        return result;
    }

    FORCE_INLINE float Vector::GetLength2() const
    {
        return Length2().GetX();
    }

    FORCE_INLINE float Vector::GetLength3() const
    {
        return Length3().GetX();
    }

    FORCE_INLINE float Vector::GetLength4() const
    {
        return Length4().GetX();
    }

    FORCE_INLINE Vector Vector::InverseLength2() const
    {
        // Perform the dot product on m_x and m_y
        auto vLengthSq = _mm_mul_ps(m_data, m_data);
        // vTemp has m_y splatted
        auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 1, 1, 1));
        // m_x+m_y
        vLengthSq = _mm_add_ss(vLengthSq, vTemp);
        vLengthSq = _mm_sqrt_ss(vLengthSq);
        vLengthSq = _mm_div_ss(Vector::One, vLengthSq);
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
        return vLengthSq;
    }

    FORCE_INLINE Vector Vector::InverseLength3() const
    {
        // Perform the dot product
        auto vDot = _mm_mul_ps(m_data, m_data);
        // m_x=Dot.m_y, m_y=Dot.m_z
        auto vTemp = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(2, 1, 2, 1));
        // Result.m_x = m_x+m_y
        vDot = _mm_add_ss(vDot, vTemp);
        // m_x=Dot.m_z
        vTemp = _mm_shuffle_ps(vTemp, vTemp, _MM_SHUFFLE(1, 1, 1, 1));
        // Result.m_x = (m_x+m_y)+m_z
        vDot = _mm_add_ss(vDot, vTemp);
        // Splat m_x
        vDot = _mm_shuffle_ps(vDot, vDot, _MM_SHUFFLE(0, 0, 0, 0));
        // Get the reciprocal
        vDot = _mm_sqrt_ps(vDot);
        // Get the reciprocal
        vDot = _mm_div_ps(Vector::One, vDot);
        return vDot;
    }

    FORCE_INLINE Vector Vector::InverseLength4() const
    {
        // Perform the dot product on m_x,m_y,m_z and m_w
        auto vLengthSq = _mm_mul_ps(m_data, m_data);
        // vTemp has m_z and m_w
        auto vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2));
        // m_x+m_z, m_y+m_w
        vLengthSq = _mm_add_ps(vLengthSq, vTemp);
        // m_x+m_z,m_x+m_z,m_x+m_z,m_y+m_w
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0));
        // ??,??,m_y+m_w,m_y+m_w
        vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0));
        // ??,??,m_x+m_z+m_y+m_w,??
        vLengthSq = _mm_add_ps(vLengthSq, vTemp);
        // Splat the length
        vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2));
        // Get the reciprocal
        vLengthSq = _mm_sqrt_ps(vLengthSq);
        // Accurate!
        vLengthSq = _mm_div_ps(Vector::One, vLengthSq);
        return vLengthSq;
    }

    FORCE_INLINE float Vector::GetInverseLength2() const
    {
        return InverseLength2().GetX();
    }

    FORCE_INLINE float Vector::GetInverseLength3() const
    {
        return InverseLength3().GetX();
    }

    FORCE_INLINE float Vector::GetInverseLength4() const
    {
        return InverseLength4().GetX();
    }

    FORCE_INLINE Vector Vector::LengthSquared2() const
    {
        return Vector::Dot2(m_data, m_data);
    }

    FORCE_INLINE Vector Vector::LengthSquared3() const
    {
        return Vector::Dot3(m_data, m_data);
    }

    FORCE_INLINE Vector Vector::LengthSquared4() const
    {
        return Vector::Dot4(m_data, m_data);
    }

    FORCE_INLINE float Vector::GetLengthSquared2() const
    {
        return LengthSquared2().GetX();
    }

    FORCE_INLINE float Vector::GetLengthSquared3() const
    {
        return LengthSquared3().GetX();
    }

    FORCE_INLINE float Vector::GetLengthSquared4() const
    {
        return LengthSquared4().GetX();
    }

    FORCE_INLINE Vector Vector::Distance2(const Vector& to) const
    {
        return (to - *this).Length2();
    }

    FORCE_INLINE Vector Vector::Distance3(const Vector& to) const
    {
        return (to - *this).Length3();
    }

    FORCE_INLINE Vector Vector::Distance4(const Vector& to) const
    {
        return (to - *this).Length4();
    }

    FORCE_INLINE float Vector::GetDistance2(const Vector& to) const
    {
        return (to - *this).Length2().GetX();
    }

    FORCE_INLINE float Vector::GetDistance3(const Vector& to) const
    {
        return (to - *this).Length3().GetX();
    }

    FORCE_INLINE float Vector::GetDistance4(const Vector& to) const
    {
        return (to - *this).Length4().GetX();
    }

    FORCE_INLINE Vector Vector::DistanceSquared2(const Vector& to) const
    {
        return (to - *this).LengthSquared2();
    }

    FORCE_INLINE Vector Vector::DistanceSquared3(const Vector& to) const
    {
        return (to - *this).LengthSquared3();
    }

    FORCE_INLINE Vector Vector::DistanceSquared4(const Vector& to) const
    {
        return (to - *this).LengthSquared4();
    }

    FORCE_INLINE float Vector::GetDistanceSquared2(const Vector& to) const
    {
        return (to - *this).GetLengthSquared2();
    }

    FORCE_INLINE float Vector::GetDistanceSquared3(const Vector& to) const
    {
        return (to - *this).GetLengthSquared3();
    }

    FORCE_INLINE float Vector::GetDistanceSquared4(const Vector& to) const
    {
        return (to - *this).GetLengthSquared4();
    }

    FORCE_INLINE bool Vector::IsNormalized2() const
    {
        return (LengthSquared2() - Vector::One).Abs().IsLessThanEqual4(Vector::NormalizeCheckThreshold);
    }

    FORCE_INLINE bool Vector::IsNormalized3() const
    {
        return (LengthSquared3() - Vector::One).Abs().IsLessThanEqual4(Vector::NormalizeCheckThreshold);
    }

    FORCE_INLINE bool Vector::IsNormalized4() const
    {
        return (LengthSquared4() - Vector::One).Abs().IsLessThanEqual4(Vector::NormalizeCheckThreshold);
    }

    FORCE_INLINE Vector Vector::InBounds(const Vector& bounds) const
    {
        // Test if less than or equal
        auto vTemp1 = _mm_cmple_ps(m_data, bounds);
        // Negate the bounds
        auto vTemp2 = _mm_mul_ps(bounds, Vector::NegativeOne);
        // Test if greater or equal (Reversed)
        vTemp2 = _mm_cmple_ps(vTemp2, m_data);
        // Blend answers
        vTemp1 = _mm_and_ps(vTemp1, vTemp2);
        return vTemp1;
    }

    FORCE_INLINE bool Vector::IsInBounds2(const Vector& bounds) const
    {
        return ((_mm_movemask_ps(InBounds(bounds)) & 0x3) == 0x3) != 0;
    }

    FORCE_INLINE bool Vector::IsInBounds3(const Vector& bounds) const
    {
        return ((_mm_movemask_ps(InBounds(bounds)) & 0x7) == 0x7) != 0;
    }

    FORCE_INLINE bool Vector::IsInBounds4(const Vector& bounds) const
    {
        return (_mm_movemask_ps(InBounds(bounds)) == 0x0f) != 0;
    }

    FORCE_INLINE Vector Vector::Equal(const Vector& v) const
    {
        return _mm_cmpeq_ps(*this, v);
    }

    FORCE_INLINE bool Vector::IsEqual2(const Vector& v) const
    {
        return (((_mm_movemask_ps(Equal(v)) & 3) == 3) != 0);
    }

    FORCE_INLINE bool Vector::IsEqual3(const Vector& v) const
    {
        return (((_mm_movemask_ps(Equal(v)) & 7) == 7) != 0);
    }

    FORCE_INLINE bool Vector::IsEqual4(const Vector& v) const
    {
        return ((_mm_movemask_ps(Equal(v)) == 0x0f) != 0);
    }

    FORCE_INLINE Vector Vector::NearEqual(const Vector& v, const Vector& epsilon) const
    {
        // Get the difference
        auto vDelta = _mm_sub_ps(m_data, v);
        // Get the absolute value of the difference
        auto vTemp = _mm_setzero_ps();
        vTemp = _mm_sub_ps(vTemp, vDelta);
        vTemp = _mm_max_ps(vTemp, vDelta);
        vTemp = _mm_cmple_ps(vTemp, epsilon);
        return vTemp;
    }

    FORCE_INLINE bool Vector::IsNearEqual2(const Vector& v, float epsilon) const
    {
        return IsNearEqual2(v, Vector(epsilon));
    }

    FORCE_INLINE bool Vector::IsNearEqual3(const Vector& v, float epsilon) const
    {
        return IsNearEqual3(v, Vector(epsilon));
    }

    FORCE_INLINE bool Vector::IsNearEqual4(const Vector& v, float epsilon) const
    {
        return IsNearEqual4(v, Vector(epsilon));
    }

    FORCE_INLINE bool Vector::IsNearEqual2(const Vector& v, const Vector& epsilon) const
    {
        return (((_mm_movemask_ps(NearEqual(v, epsilon)) & 3) == 0x3) != 0);
    }

    FORCE_INLINE bool Vector::IsNearEqual3(const Vector& v, const Vector& epsilon) const
    {
        return (((_mm_movemask_ps(NearEqual(v, epsilon)) & 7) == 0x7) != 0);
    }

    FORCE_INLINE bool Vector::IsNearEqual4(const Vector& v, const Vector& epsilon) const
    {
        return ((_mm_movemask_ps(NearEqual(v, epsilon)) == 0xf) != 0);
    }

    FORCE_INLINE Vector Vector::GreaterThan(const Vector& v) const
    {
        return _mm_cmpgt_ps(m_data, v);
    }

    FORCE_INLINE bool Vector::IsAnyGreaterThan(const Vector& v) const
    {
        return !GreaterThan(v).IsZero4();
    }

    FORCE_INLINE bool Vector::IsGreaterThan2(const Vector& v) const
    {
        return (((_mm_movemask_ps(GreaterThan(v)) & 3) == 3) != 0);
    }

    FORCE_INLINE bool Vector::IsGreaterThan3(const Vector& v) const
    {
        return (((_mm_movemask_ps(GreaterThan(v)) & 7) == 7) != 0);
    }

    FORCE_INLINE bool Vector::IsGreaterThan4(const Vector& v) const
    {
        return ((_mm_movemask_ps(GreaterThan(v)) == 0x0f) != 0);
    }

    FORCE_INLINE Vector Vector::GreaterThanEqual(const Vector& v) const
    {
        return _mm_cmpge_ps(m_data, v);
    }

    FORCE_INLINE bool Vector::IsAnyGreaterThanEqual(const Vector& v) const
    {
        return !GreaterThanEqual(v).IsZero4();
    }

    FORCE_INLINE bool Vector::IsGreaterThanEqual2(const Vector& v) const
    {
        return ((_mm_movemask_ps(GreaterThanEqual(v)) & 3) == 3) != 0;
    }

    FORCE_INLINE bool Vector::IsGreaterThanEqual3(const Vector& v) const
    {
        return ((_mm_movemask_ps(GreaterThanEqual(v)) & 7) == 7) != 0;
    }

    FORCE_INLINE bool Vector::IsGreaterThanEqual4(const Vector& v) const
    {
        return (_mm_movemask_ps(GreaterThanEqual(v)) == 0x0f) != 0;
    }

    FORCE_INLINE Vector Vector::LessThan(const Vector& v) const
    {
        return _mm_cmplt_ps(m_data, v);
    }

    FORCE_INLINE bool Vector::IsAnyLessThan(const Vector& v) const
    {
        return !LessThan(v).IsZero4();
    }

    FORCE_INLINE bool Vector::IsLessThan2(const Vector& v) const
    {
        return (((_mm_movemask_ps(LessThan(v)) & 3) == 3) != 0);
    }

    FORCE_INLINE bool Vector::IsLessThan3(const Vector& v) const
    {
        return (((_mm_movemask_ps(LessThan(v)) & 7) == 7) != 0);
    }

    FORCE_INLINE bool Vector::IsLessThan4(const Vector& v) const
    {
        return ((_mm_movemask_ps(LessThan(v)) == 0x0f) != 0);
    }

    FORCE_INLINE Vector Vector::LessThanEqual(const Vector& v) const
    {
        return _mm_cmple_ps(m_data, v);
    }

    FORCE_INLINE bool Vector::IsAnyLessThanEqual(const Vector& v) const
    {
        return !LessThanEqual(v).IsZero4();
    }

    FORCE_INLINE bool Vector::IsLessThanEqual2(const Vector& v) const
    {
        return (((_mm_movemask_ps(LessThanEqual(v)) & 3) == 3) != 0);
    }

    FORCE_INLINE bool Vector::IsLessThanEqual3(const Vector& v) const
    {
        return (((_mm_movemask_ps(LessThanEqual(v)) & 7) == 7) != 0);
    }

    FORCE_INLINE bool Vector::IsLessThanEqual4(const Vector& v) const
    {
        return ((_mm_movemask_ps(LessThanEqual(v)) == 0x0f) != 0);
    }

    FORCE_INLINE Vector Vector::EqualsZero() const
    {
        return Equal(Vector::Zero);
    }

    FORCE_INLINE bool Vector::IsAnyEqualToZero2() const
    {
        return !EqualsZero().IsZero2();
    }

    FORCE_INLINE bool Vector::IsAnyEqualToZero3() const
    {
        return !EqualsZero().IsZero3();
    }

    FORCE_INLINE bool Vector::IsAnyEqualToZero4() const
    {
        return !EqualsZero().IsZero4();
    }

    FORCE_INLINE bool Vector::IsZero2() const
    {
        return IsEqual2(Vector::Zero);
    }

    FORCE_INLINE bool Vector::IsZero3() const
    {
        return IsEqual3(Vector::Zero);
    }

    FORCE_INLINE bool Vector::IsZero4() const
    {
        return IsEqual4(Vector::Zero);
    }

    FORCE_INLINE Vector Vector::NearEqualsZero(float epsilon) const
    {
        return NearEqual(Vector::Zero, Vector(epsilon));
    }

    FORCE_INLINE bool Vector::IsNearZero2(float epsilon) const
    {
        return IsNearEqual2(Vector::Zero, Vector(epsilon));
    }

    FORCE_INLINE bool Vector::IsNearZero3(float epsilon) const
    {
        return IsNearEqual3(Vector::Zero, Vector(epsilon));
    }

    FORCE_INLINE bool Vector::IsNearZero4(float epsilon) const
    {
        return IsNearEqual4(Vector::Zero, Vector(epsilon));
    }

    FORCE_INLINE Vector Vector::EqualsInfinity() const
    {
        __m128 vTemp = _mm_and_ps(m_data, SIMD::g_absMask);
        return _mm_cmpeq_ps(vTemp, Vector::Infinity);
    }

    FORCE_INLINE bool Vector::IsInfinite2() const
    {
        return (_mm_movemask_ps(EqualsInfinity()) & 3) != 0;
    }

    FORCE_INLINE bool Vector::IsInfinite3() const
    {
        return (_mm_movemask_ps(EqualsInfinity()) & 7) != 0;
    }

    FORCE_INLINE bool Vector::IsInfinite4() const
    {
        return (_mm_movemask_ps(EqualsInfinity()) != 0);
    }

    FORCE_INLINE Vector Vector::EqualsNaN() const
    {
        return _mm_cmpneq_ps(m_data, m_data);
    }

    FORCE_INLINE bool Vector::IsNaN2() const
    {
        return (_mm_movemask_ps(EqualsNaN()) & 3) != 0;
    }

    FORCE_INLINE bool Vector::IsNaN3() const
    {
        return (_mm_movemask_ps(EqualsNaN()) & 7) != 0;
    }

    FORCE_INLINE bool Vector::IsNaN4() const
    {
        return (_mm_movemask_ps(EqualsNaN()) != 0);
    }

    FORCE_INLINE bool Vector::IsParallelTo(const Vector& v) const
    {
        Vector const vAbsDot = Vector::Dot3(*this, v).GetAbs();
        Vector const vAbsDelta = Vector::One - vAbsDot;
        return vAbsDelta.IsLessThanEqual4(Vector::Epsilon);
    }

    FORCE_INLINE void Vector::ToDirectionAndLength2(Vector& direction, float& length) const
    {
        Vector const vLength = Length2();
        direction = Vector::Select(*this, Vector::Zero, Select0011);
        direction /= vLength;
        length = vLength.ToFloat();
    }

    FORCE_INLINE void Vector::ToDirectionAndLength3(Vector& direction, float& length) const
    {
        Vector const vLength = Length3();
        direction = Vector::Select(*this, Vector::Zero, Select0001);
        direction /= vLength;
        length = vLength.ToFloat();
    }

    FORCE_INLINE bool Vector::operator==(const Vector& rhs) const
    {
        return IsEqual4(rhs);
    }

    FORCE_INLINE bool Vector::operator!=(const Vector& rhs) const
    {
        return !IsEqual4(rhs);
    }
}


================================================
FILE: MotionCorrection/src/cpp/Platform.h
================================================
/*
 * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

// Finds the current platform
#if defined( __WIN32__ ) || defined( _WIN32 )
#    define PLATFORM_WIN32
#else
#    define PLATFORM_LINUX
#endif

//
// Platform Specific Helpers/Functions
//

// DLL export
#if defined(PLATFORM_WIN32) // Windows
#    if defined(COMPILER_MSVC)
#        if defined(STATIC_LIB)
#            define API
#        else
#            if defined(API)
#                define API __declspec(dllexport)
#            else
#                define API __declspec(dllimport)
#            endif
#        endif
#    else
#        if defined(STATIC_LIB)
#            define API
#        else
#            if defined(API)
#                define API __attribute__ ((dllexport))
#            else
#                define API __attribute__ ((dllimport))
#            endif
#        endif
#    endif
#    define DISABLE_OPTIMIZATION __pragma( optimize( "", off ) )
#    define ENABLE_OPTIMIZATION __pragma( optimize( "", on ) )
#    define DEBUG_BREAK() // __debugbreak()
#else // Linux settings
#    include <signal.h>
#    define API __attribute__ ((visibility ("default")))
#    define DISABLE_OPTIMIZATION
#    define ENABLE_OPTIMIZATION
#    define DEBUG_BREAK() // raise(SIGTRAP)
#endif


================================================
FILE: README.md
================================================
<p align="center">
  <img src="./assets/banner.png" alt="Banner" width="100%">
  <a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-76B900.svg" alt="License"></a>
  <a href="https://research.nvidia.com/labs/sil/projects/kimodo/"><img src="https://img.shields.io/badge/Project-Page-blue" alt="Project Page"></a>
  <a href="https://research.nvidia.com/labs/sil/projects/kimodo/docs/index.html"><img src="https://img.shields.io/badge/docs-online-green.svg" alt="Documentation"></a>
</p>

## Overview

Kimodo is a **ki**nematic **mo**tion **d**iffusi**o**n model trained on a large-scale (700 hours) commercially-friendly optical motion capture dataset. The model generates high-quality 3D human and robot motions, and is controlled through text prompts and an extensive set of constraints such as full-body pose keyframes, end-effector positions/rotations, 2D paths, and 2D waypoints. Full details of the model architecture and training are available in the [technical report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf).

This repository provides:
- **Inference**: code and CLI to generate motions on both human and robot skeletons
- **Interactive Demo**: easily author motions with a timeline interface of text prompts and kinematic controls
- **Benchmark**: [test cases](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) and evaluation code built on the [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset to evaluate motion generation models based on text and constraint-following abilities
- **Annotations**: fine-grained temporal text descriptions created for the Kimodo project are included in the [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset. For more information on these labels, see our separate [Hugging Face repo](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations).

<div align="center">
  <img src="assets/teaser.gif" width="1280">
</div>

## News

See the [full changelog](CHANGELOG.md) for a detailed list of all changes.

- **[2026-05-03]** _FIX_: fixed a bug causing incorrect calculation of averaged metrics for constraint test cases in the benchmark
- **[2026-04-24]** _NEW_: improved multi-prompt generation and better support for small VRAM GPUs via `TEXT_ENCODER_DEVICE=cpu` env var
- **[2026-04-10]** Released the [Kimodo Motion Generation Benchmark](#kimodo-motion-generation-benchmark) alongside new v1.1 Kimodo-SOMA models
- **[2026-03-19]** **Breaking:** Model inputs/outputs now use the SOMA 77-joint skeleton (`somaskel77`).
- **[2026-03-16]** Initial open-source release of Kimodo with five model variants (SOMA, G1, SMPL-X), CLI, interactive demo, and timeline annotations for BONES-SEED.


## Kimodo Models

Several variations of Kimodo are available trained on various skeletons and datasets. All models support text-to-motion and kinematic controls.

> Note: models will be downloaded automatically when attempting to generate from the CLI or Interactive Demo, so there is no need to download them manually

| Model | Skeleton | Training Data | Release Date | Hugging Face | License |
|:-------|:-------------|:------:|:------:|:-------------:|:-------------:|
| **Kimodo-SOMA-RP-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | April 10, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SOMA-SEED-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | April 10, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SOMA-RP-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-G1-RP-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-G1-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SOMA-SEED-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-G1-SEED-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-G1-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SMPLX-RP-v1** | [SMPL-X](https://github.com/vchoutas/smplx) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) | [NVIDIA R&D Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-internal-scientific-research-and-development-model-license/) |

By default, we recommend using the models trained on the full Bones Rigplay 1 dataset (700 hours of mocap) for your motion generation needs.
The models trained on BONES-SEED use 288 hours of [publicly available mocap data](https://huggingface.co/datasets/bones-studio/seed) so are less capable, but are useful for comparing to other models trained on BONES-SEED. To easily compare motion generation models to Kimodo, check out our [Motion Generation Benchmark](#kimodo-motion-generation-benchmark).

### Changes in v1.1
The latest v1.1 Kimodo-SOMA models were released primarily for compatibility with our new [Motion Generation Benchmark](#kimodo-motion-generation-benchmark), but also contain minor quality improvements over v1. For details on these improvements, please see the Hugging Face pages for [Kimodo-SOMA-RP-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1#changes-in-v11) and [Kimodo-SOMA-SEED-v1.1](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1#changes-in-v11).

## Getting Started

Please see the full documentation for detailed installation instructions, how to use the CLI and Interactive Demo, and other practical tips for generating motions with Kimodo:

**[Full Documentation](https://research.nvidia.com/labs/sil/projects/kimodo/docs)**
- [Quick Start Guide](https://research.nvidia.com/labs/sil/projects/kimodo/docs/getting_started/quick_start.html)
- [Installation Instructions](https://research.nvidia.com/labs/sil/projects/kimodo/docs/getting_started/installation.html)
- [Interactive Motion Authoring Demo](https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html)
- [Command-Line Interface](https://research.nvidia.com/labs/sil/projects/kimodo/docs/user_guide/cli.html)
- [Benchmark Instructions](https://research.nvidia.com/labs/sil/projects/kimodo/docs/benchmark/introduction.html)
- [API Reference](https://research.nvidia.com/labs/sil/projects/kimodo/docs/api_reference/index.html)

**Before getting started** with motion generation, please review the [best practices](https://research.nvidia.com/labs/sil/projects/kimodo/docs/key_concepts/limitations.html) and be aware of [model limitations](https://research.nvidia.com/labs/sil/projects/kimodo/docs/key_concepts/limitations.html#limitations).


Some notes on installation environment:
- Kimodo requires ~17GB of VRAM to generate locally entirely on GPU, primarily due to the text embedding model. If you have a smaller card, set `TEXT_ENCODER_DEVICE=cpu` when running Kimodo commands to force text encoding to the CPU. This is slightly slower but reduces VRAM usage to <3 GB.
- The model has been most extensively tested on GeForce RTX 3090, GeForce RTX 4090, and NVIDIA A100 GPUs, but should work on other recent cards with sufficient VRAM
- This repo was developed on Linux, though Windows should work especially if using Docker

## Interactive Motion Authoring Demo

<div align="center">
  <img src="assets/demo_screenshot.png" width="1000">
</div>

</br>

**[Demo Documentation and Tutorial](https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html)**

The web-based interactive demo provides an intuitive interface for generating motions with any of the Kimodo model variations. After installation, the demo can be launched with the `kimodo_demo` command. It runs locally on http://127.0.0.1:7860. Open this URL in your browser to access the interface (or use port forwarding if set up on a server).

### Demo Features
- **Multiple Characters**: Supports generating with the SOMA, G1, and SMPL-X versions of Kimodo
- **Text Prompts**: Enter one or more natural language descriptions of desired motions on the timeline
- **Timeline Editor**: Add and edit keyframes and constrained intervals on multiple constraint tracks
- **Constraint Types**:
  - Full-Body: Complete joint position constraints at specific frames
  - 2D Root: Define waypoints or full paths to follow on the ground plane
  - End-Effectors: Control hands and feet positions/rotations
- **Constraint Editing**: Editing mode allows for re-posing of constraints or adjusting waypoints
- **3D Visualization**: Real-time rendering of generated motions with skeleton and skinned mesh options
- **Playback Controls**: Preview generated motions with adjustable playback speed
- **Multiple Samples**: Generate and compare multiple motion variations
- **Examples**: Load pre-existing examples to better understand Kimodo's capabilities
- **Export**: Save constraints and generated motions for later use

## Command-Line Interface

**[CLI Documentation and Examples](https://research.nvidia.com/labs/sil/projects/kimodo/docs/user_guide/cli.html)**

Motions can also be generated directly from the command line with the `kimodo_gen` command or by running `python -m kimodo.scripts.generate` directly.

**Key Arguments:**
- `prompt`: A single text description or sequence of texts for the desired motion (required)
- `--model`: Which Kimodo model to use for generation
- `--duration`: Motion duration in seconds
- `--num_samples`: Number of motion variations to generate
- `--constraints`: Constraint file to control the generated motion (e.g., saved from the web demo)
- `--diffusion_steps`: Number of denoising steps
- `--cfg_type` / `--cfg_weight`: Classifier-free guidance (`nocfg`, `regular` with one weight, or `separated` with two weights for text vs. constraints); see the [CLI docs](https://research.nvidia.com/labs/sil/projects/kimodo/docs/user_guide/cli.html#classifier-free-guidance-cfg)
- `--no-postprocess`: Flag to disable foot skate and constraint cleanup post-processing
- `--seed`: Random seed for reproducible results

The script supports different output formats depending on which skeleton is used. By default, a custom NPZ format is saved that is compatible with the web demo.
For Kimodo-G1 models, the motion can be saved in the standard MuJoCo qpos CSV format.
For Kimodo-SMPLX, motion can be saved in the standard AMASS npz format for compability with existing pipelines.

### Default NPZ Output Format
Generated motions are saved as NPZ files containing:
- `posed_joints`: Global joint positions `[T, J, 3]`
- `global_rot_mats`: Global joint rotation matrices `[T, J, 3, 3]`
- `local_rot_mats`: Local (parent-relative) joint rotation matrices `[T, J, 3, 3]`
- `foot_contacts`: Foot contact labels [left heel, left toe, right heel, right toes] `[T, 4]`
- `smooth_root_pos`: Smoothed root representations outputted from the model `[T, 3]`
- `root_positions`: The (non-smoothed) trajectory of the actual root joint (e.g., pelvis) `[T, 3]`
- `global_root_heading`: The heading direction output from the model `[T, 2]`

`T` the number of frames and `J` the number of joints.

## Low-Level Python API

**[Model API Documentation](https://research.nvidia.com/labs/sil/projects/kimodo/docs/api_reference/model.html#kimodo.model.kimodo_model.Kimodo.__call__)**

For maximum flexibility, the low-level model inference API can be called directly, rather than going through our high-level CLI.
This allows for advanced model configuration including classifier-free guidance weights and parameters related to transitions in multi-prompt sequences.

## Downstream Robotics Applications of Kimodo

### Visualizing G1 Motions with MuJoCo

<div align="center">
  <img src="assets/mujoco_result.gif" width="800">
</div>

After generating motions on the G1 robot skeleton and saving to the MuJoCo qpos CSV file format, they can be easily used and visualized within MuJoCo.
A minimal visualization script is available with:
```
python -m kimodo.scripts.mujoco_load
```
Make sure to edit the script to correctly point to your CSV file and install Mujoco before running this.

### Tracking Generated Motions with ProtoMotions

<div align="center">
  <img src="assets/protomotions_results.gif" width="1280">
</div>

[ProtoMotions](https://github.com/NVlabs/ProtoMotions) is a GPU-accelerated simulation and learning framework for training physically simulated digital humans and humanoid robots. The Kimodo NPZ and CSV output formats are both compatible with ProtoMotions making it easy to train physics-based policies with generated motions from Kimodo. ProtoMotions supports outputs on both the SOMA skeleton and Unitree G1

After generating motions with Kimodo, head over to the [ProtoMotions docs](https://github.com/NVlabs/ProtoMotions?tab=readme-ov-file#-motion-authoring-with-kimodo) to see how to import them.

### Retargeting Motions to Other Robots with GMR

<div align="center">
  <img src="assets/gmr_results.gif" width="1280">
</div>

Motions generated by Kimodo-SMPLX can be retargeted to other robots using [General Motion Retargeting (GMR)](https://github.com/YanjieZe/GMR).
GMR supports the AMASS NPZ format out of the box, so simply generate motions with Kimodo and use `--output` to save; the AMASS NPZ is written to `stem_amass.npz` (single sample) or in the output folder (multiple samples). Then, use the [SMPL-X to Robot script](https://github.com/YanjieZe/GMR?tab=readme-ov-file#retargeting-from-smpl-x-amass-omomo-to-robot) in GMR to retarget to any supported robot. For example:
```
# run within GMR codebase
python scripts/smplx_to_robot.py --smplx_file /path/to/saved/amass_format.npz --robot booster_t1
```

### Combining Kimodo with GEAR-SONIC

<div align="center">
  <img src="assets/sonic_kimodo_demo.gif" width="800">
</div>

As a proof of concept, we have also incorporated Kimodo into the [interactive GEAR-SONIC demo](https://nvlabs.github.io/GEAR-SONIC/demo.html). In the demo, Kimodo can be used to generate a kinematic motion on the G1 robot skeleton, then GEAR-SONIC tracks the motion in simulation.

## Kimodo Motion Generation Benchmark

[**[Benchmark Documentation](https://research.nvidia.com/labs/sil/projects/kimodo/docs/benchmark/introduction.html)**]
[**[Test Suite on Hugging Face](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark)**]

Alongside the Kimodo models, we provide a benchmark designed to standardize evaluation for motion generation models with a comprehensive set of test cases. This includes:

* **Evaluation Data**: A suite of test cases [available on Hugging Face](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) is used in concert with the [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset to construct the full benchmark. 
* **Diverse Test Cases**: Test cases cover a wide range of text-conditioned and constraint-conditioned motion generation.
* **Evaluation Pipeline**: Code for the full evaluation pipeline including benchmark construction, motion generation, and evaluation.
* **Metrics**: Several metrics to evaluate generated motions that cover motion quality, constraint following, and text alignment. Our [TMR-SOMA-RP-v1](https://huggingface.co/nvidia/TMR-SOMA-RP-v1) model trained on all 700 hours of the Bones Rigplay dataset is a powerful embedding model to compute common metrics like R-precision and FID.

To facilitate future research, we [report benchmark results](https://research.nvidia.com/labs/sil/projects/kimodo/docs/benchmark/results.html) for Kimodo-SOMA-v1.1 models, which are reproducible and easily comparable to other methods trained on the BONES-SEED data. 

## Timeline Annotations for BONES-SEED

As detailed in the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf), Kimodo is trained using fine-grained temporal text annotations of mocap clips.
While the full [Rigplay 1](https://bones.studio/datasets#rp01) dataset is proprietary, we have released the temporal segmentations for the public [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) subset.
These annotations are already included in the BONES-SEED dataset, but the standalone labels and additional information about them is [available on HuggingFace](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations).


## Related Humanoid Work at NVIDIA
Kimodo is part of a larger effort to enable humanoid motion data for robotics, physical AI, and other applications.

Check out these related works:
* [SOMA Body Model](https://github.com/NVlabs/SOMA-X) - a unified parameteric human body model
* [BONES-SEED Dataset](https://huggingface.co/datasets/bones-studio/seed) - a large scale human(oid) motion capture dataset in SOMA and G1 format
* [ProtoMotions](https://github.com/NVlabs/ProtoMotions) - simulation and learning framework for training physically simulated human(oid)s
* [SOMA Retargeter](https://github.com/NVIDIA/soma-retargeter) - SOMA to G1 retargeting tool
* [GEM](https://github.com/NVlabs/GEM-X) - human motion reconstruction from video
* [GEAR SONIC](https://github.com/NVlabs/GR00T-WholeBodyControl) - humanoid behavior foundation model for physical robots

## Citation

If you use this code in your research, please cite:

```bibtex
@article{Kimodo2026,
  title={Kimodo: Scaling Controllable Human Motion Generation},
  author={Rempe, Davis and Petrovich, Mathis and Yuan, Ye and Zhang, Haotian and Peng, Xue Bin and Jiang, Yifeng and Wang, Tingwu and Iqbal, Umar and Minor, David and de Ruyter, Michael and Li, Jiefeng and Tessler, Chen and Lim, Edy and Jeong, Eugene and Wu, Sam and Hassani, Ehsan and Huang, Michael and Yu, Jin-Bey and Chung, Chaeyeon and Song, Lina and Dionne, Olivier and Kautz, Jan and Yuen, Simon and Fidler, Sanja},
  journal={arXiv:2603.15546},
  year={2026}
}
```

## License

This codebase is licensed under [Apache-2.0](LICENSE). Note that model checkpoints and data are licensed separately as indicated on the HuggingFace download pages.

This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use.

## Acknowledgments

This project builds upon excellent open-source projects:
- [Viser](https://github.com/nerfstudio-project/viser) for 3D motion authoring demo
- [LLM2Vec](https://github.com/McGill-NLP/llm2vec) for text encoding

## Contact

For questions or issues, please open an issue on this repository or reach out directly to the authors.

---


================================================
FILE: benchmark/create_benchmark.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Step (1) of evaluation pipeline.

This script builds the benchmark test suites from BVH motions in the Bones-SEED dataset using 
the benchmark metadata. Currently it is only set up for the SOMA skeleton.
"""

import argparse
from functools import partial
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import torch
from tqdm import tqdm

from kimodo.geometry import matrix_to_axis_angle
from kimodo.motion_rep import KimodoMotionRep
from kimodo.skeleton import SOMASkeleton77
from kimodo.skeleton.bvh import parse_bvh_motion
from kimodo.tools import load_json, save_json, to_numpy, to_torch

FPS = 30
BENCHMARK_REPO_ID = "nvidia/Kimodo-Motion-Gen-Benchmark"


def download_benchmark(dest: Path) -> Path:
    """Download the benchmark testsuite from HuggingFace to *dest*."""
    from huggingface_hub import snapshot_download

    print(f"Downloading benchmark testsuite from {BENCHMARK_REPO_ID} to {dest} ...")
    snapshot_dir = snapshot_download(
        repo_id=BENCHMARK_REPO_ID,
        repo_type="dataset",
        local_dir=str(dest),
    )
    return Path(snapshot_dir)


def discover_seed_motion_folders(root: Path) -> list[Path]:
    """Find all directories under root that contain seed_motion.json; return sorted list of those
    dirs."""
    root = root.resolve()
    if not root.is_dir():
        raise FileNotFoundError(f"Folder does not exist: {root}")
    out: list[Path] = []
    for meta_path in root.rglob("seed_motion.json"):
        src_dir = meta_path.parent
        out.append(src_dir)
    return sorted(out)


def constraints_and_motion_from_seed(folder: str, dataset_folder: str, fps=FPS):
    """Load seed_motion.json and BVH from folder; subsample to fps, convert to SOMA gt_motion.npz
    and constraints."""
    folder = Path(folder)
    dataset_folder = Path(dataset_folder)
    out_path = folder / "gt_motion.npz"

    seed_motion = load_json(folder / "seed_motion.json")

    start = seed_motion["crop_start_frame_index"]
    end = seed_motion["crop_end_frame_index"]

    bvh_path = dataset_folder / seed_motion["bvh_path"].replace("BVH/", "bvh/")

    local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(bvh_path)
    step = round(bvh_fps / fps)

    # Subsample fps
    root_trans = root_trans[::step]
    local_rot_mats = local_rot_mats[::step]

    skeleton = SOMASkeleton77()
    # Changing t_pose: essential step
    local_rot_mats, global_rot_mats = skeleton.to_standard_tpose(local_rot_mats)

    # Use the motion rep to canonicalize the motion (start z+ at 0,0)
    # and get other components (smooth root, foot contacts etc)
    motion_rep = KimodoMotionRep(skeleton, fps)
    feats = motion_rep(local_rot_mats, root_trans, to_normalize=False)

    # Crop the features and canonicalizing them
    feats = feats[start:end]
    can_feats = motion_rep.canonicalize(feats)
    # Get back the motion
    motion = motion_rep.inverse(can_feats, is_normalized=False)
    motion = to_numpy(to_torch(motion, dtype=torch.float32))

    np.savez(out_path, **motion)

    seed_constraints_path = folder / "seed_constraints.json"
    if seed_constraints_path.exists():
        seed_constraints_lst = load_json(seed_constraints_path)

        constraints_lst = []
        for seed_cons in seed_constraints_lst:
            cons = seed_cons.copy()
            frame_indices = cons["frame_indices"]

            cons["smooth_root_2d"] = motion["smooth_root_pos"][frame_indices][..., [0, 2]].tolist()

            if cons["type"] == "root2d":
                if cons.get("use_global_orient", False):
                    cons["global_root_heading"] = motion["global_root_heading"][  # noqa
                        frame_indices
                    ].tolist()
            elif cons["type"] in ["fullbody"] or cons["type"] in [
                "left-hand",
                "right-hand",
                "left-foot",
                "right-foot",
                "end-effector",
            ]:
                cons["local_joints_rot"] = matrix_to_axis_angle(
                    to_torch(motion["local_rot_mats"][frame_indices])
                ).tolist()
                cons["root_positions"] = motion["root_positions"][frame_indices].tolist()
            else:
                raise TypeError(f"This constraint type is not recognized: {cons['type']}")

            constraints_lst.append(cons)

        # check that it is close to old_constraints_lst
        save_json(folder / "constraints.json", constraints_lst)


def main():
    parser = argparse.ArgumentParser(
        description="Recursively find test case to fill with motions and constraints.",
    )
    parser.add_argument(
        "benchmark",
        type=Path,
        help="Root folder to search recursively or seed_motion.json for to download the benchmark testsuite from HuggingFace to.",
    )
    parser.add_argument(
        "--dataset",
        type=Path,
        default="datasets/bones-seed/soma_uniform",
        help="SEED dataset folder",
    )
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="Redo the process even if gt_motion.npz already exists",
    )
    parser.add_argument(
        "--workers",
        type=int,
        default=1,
        help="Number of parallel worker processes (default: 1, sequential)",
    )
    args = parser.parse_args()

    folder = args.benchmark.resolve()
    if not folder.is_dir():
        print(f"Benchmark folder not found at {folder}, downloading from HuggingFace...")
        download_benchmark(folder)

    dirs = discover_seed_motion_folders(folder)
    if not dirs:
        raise SystemExit(f"No directories with seed_motion.json found under {folder}")
    print(f"Discovered {len(dirs)} motion to populate.")

    skipped = 0
    to_process = []
    for d in dirs:
        if not args.overwrite and (d / "gt_motion.npz").is_file():
            skipped += 1
        else:
            to_process.append(d)

    fn = partial(constraints_and_motion_from_seed, dataset_folder=args.dataset)
    with Pool(args.workers) as pool:
        list(tqdm(pool.imap_unordered(fn, to_process), total=len(to_process), desc="Extracting GT motions"))

    if skipped:
        print(f"Processed {len(dirs) - skipped} folders, skipped {skipped} (already present).")
    else:
        print("Saved gt_motion.npz and constraints.json from the seed files.")


if __name__ == "__main__":
    main()


================================================
FILE: benchmark/embed_folder.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Step (3) of evaluation pipeline.

This script recursively embeds generated motions, ground-truth motions, and text prompts from a test suite folder tree with the pre-trained TMR model.
"""

import argparse
from pathlib import Path

import numpy as np
import torch
from tqdm import tqdm

from kimodo.meta import parse_prompts_from_meta
from kimodo.model.load_model import load_model
from kimodo.tools import load_json


def discover_motion_folders(root: Path) -> list[Path]:
    root = root.resolve()
    if not root.is_dir():
        raise FileNotFoundError(f"Folder does not exist: {root}")
    out: list[Path] = []
    for meta_path in root.rglob("meta.json"):
        src_dir = meta_path.parent
        if (src_dir / "motion.npz").is_file() or (src_dir / "gt_motion.npz").is_file():
            out.append(src_dir)
    return sorted(out)


def _load_posed_joints(npz_path: Path, device: str) -> torch.Tensor:
    data = np.load(npz_path)
    if "posed_joints" not in data:
        raise SystemExit(f"NPZ must contain 'posed_joints': {npz_path}")
    posed_joints = data["posed_joints"]
    if posed_joints.ndim == 4:
        if posed_joints.shape[0] != 1:
            raise SystemExit(f"Expected batch size 1 for posed_joints, got {posed_joints.shape[0]} in {npz_path}")
        posed_joints = posed_joints[0]
    if posed_joints.ndim != 3:
        raise SystemExit(f"Expected posed_joints shape [T, J, 3], got {posed_joints.shape} in {npz_path}")
    return torch.from_numpy(posed_joints).float().to(device)


def main():
    parser = argparse.ArgumentParser(
        description="Recursively embed motion, gt_motion, and text; save motion_embedding.npy, gt_motion_embedding.npy, and text_embedding.npy when present.",
    )
    parser.add_argument(
        "folder",
        type=Path,
        help="Root folder to search recursively for meta.json and motion.npz and/or gt_motion.npz",
    )
    parser.add_argument(
        "--model",
        default="tmr-soma-rp",
        help="Model for encoding (e.g. TMR-SOMA-RP-v1, tmr-soma-rp). Default: tmr-soma-rp",
    )
    parser.add_argument(
        "--device",
        default=None,
        help="Device (default: cuda if available else cpu)",
    )
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="Re-embed even if embedding files already exist",
    )
    parser.add_argument(
        "--text_encoder_fp32",
        action="store_true",
        help="Uses fp32 for the text encoder rather than default bfloat16.",
    )
    args = parser.parse_args()

    folder = args.folder.resolve()
    if not folder.is_dir():
        raise SystemExit(f"Folder does not exist or is not a directory: {folder}")

    device = args.device or ("cuda" if torch.cuda.is_available() else "cpu")
    model = load_model(modelname=args.model, device=device, default_family="TMR", text_encoder_fp32=args.text_encoder_fp32)

    dirs = discover_motion_folders(folder)
    if not dirs:
        raise SystemExit(f"No directories with meta.json and (motion.npz or gt_motion.npz) found under {folder}")
    print(f"Discovered {len(dirs)} motion folders.")

    skipped_motion = 0
    skipped_gt = 0
    skipped_text = 0
    for sample_dir in tqdm(dirs, desc="Embedding"):
        meta_path = sample_dir / "meta.json"
        meta = load_json(meta_path)
        texts, _ = parse_prompts_from_meta(meta)
        if len(texts) != 1:
            raise SystemExit(f"Expected exactly one text per motion; got {len(texts)} in {meta_path}")
        text = texts[0]

        # Embed motion.npz -> motion_embedding.npy
        if (sample_dir / "motion.npz").is_file():
            if not args.overwrite and (sample_dir / "motion_embedding.npy").is_file():
                skipped_motion += 1
            else:
                npz_path = sample_dir / "motion.npz"
                posed_joints = _load_posed_joints(npz_path, device)
                with torch.inference_mode():
                    motion_emb = model.encode_motion(posed_joints, unit_vector=True)
                np.save(sample_dir / "motion_embedding.npy", motion_emb.cpu().numpy())

        # Embed gt_motion.npz -> gt_motion_embedding.npy
        if (sample_dir / "gt_motion.npz").is_file():
            if not args.overwrite and (sample_dir / "gt_motion_embedding.npy").is_file():
                skipped_gt += 1
            else:
                npz_path = sample_dir / "gt_motion.npz"
                posed_joints = _load_posed_joints(npz_path, device)
                with torch.inference_mode():
                    gt_motion_emb = model.encode_motion(posed_joints, unit_vector=True)
                np.save(sample_dir / "gt_motion_embedding.npy", gt_motion_emb.cpu().numpy())

        # Embed text -> text_embedding.npy
        if not args.overwrite and (sample_dir / "text_embedding.npy").is_file():
            skipped_text += 1
        else:
            with torch.inference_mode():
                text_emb = model.encode_raw_text([text], unit_vector=True)
            np.save(sample_dir / "text_embedding.npy", text_emb.cpu().numpy())

    total_skipped = skipped_motion + skipped_gt + skipped_text
    if total_skipped:
        print(f"Embedded {len(dirs)} folders; skipped some existing files (use --overwrite to re-embed).")
    else:
        print(f"Saved motion_embedding.npy, gt_motion_embedding.npy, and text_embedding.npy in {len(dirs)} folders.")


if __name__ == "__main__":
    main()


================================================
FILE: benchmark/evaluate_folder.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Step (4) of evaluation pipeline.

This script recursively computes metrics for generated and ground-truth motions within a test suite folder tree. 
Saves metrics json files per test case and per group of test cases in the folder tree.
"""

import argparse
import json
from itertools import groupby
from pathlib import Path
from typing import Any

import numpy as np
import torch
from tqdm import tqdm

from kimodo.constraints import load_constraints_lst
from kimodo.meta import parse_prompts_from_meta
from kimodo.metrics import (
    ContraintFollow,
    FootContactConsistency,
    FootSkateFromContacts,
    FootSkateFromHeight,
    FootSkateRatio,
    TMR_EmbeddingMetric,
    aggregate_metrics,
    clear_metrics,
    compute_metrics,
    compute_tmr_per_sample_retrieval,
)
from kimodo.skeleton import build_skeleton
from kimodo.skeleton.definitions import SOMASkeleton30
from kimodo.tools import load_json, to_torch

DEFAULT_FPS = 30.0


def discover_motion_folders(root: Path) -> list[tuple[Path, Path]]:
    root = root.resolve()
    if not root.is_dir():
        raise FileNotFoundError(f"Folder does not exist: {root}")
    out: list[tuple[Path, Path]] = []
    for meta_path in root.rglob("meta.json"):
        sample_dir = meta_path.parent
        if (sample_dir / "motion.npz").is_file() and (sample_dir / "gt_motion.npz").is_file():
            rel = sample_dir.relative_to(root)
            out.append((sample_dir, rel))
    return sorted(out, key=lambda x: str(x[1]))


def group_by_parent(examples: list[tuple[Path, Path]]) -> list[list[tuple[Path, Path]]]:
    def parent_key(item: tuple[Path, Path]) -> Path:
        return item[1].parent if len(item[1].parts) > 1 else Path(".")

    sorted_examples = sorted(examples, key=parent_key)
    groups: list[list[tuple[Path, Path]]] = []
    for _key, group in groupby(sorted_examples, key=parent_key):
        groups.append(list(group))
    return groups


def _to_scalar(t: torch.Tensor) -> float:
    return float(t.mean().item()) if t.numel() > 0 else float(t.item())


def _to_p95(t: torch.Tensor) -> float:
    if t.numel() == 0:
        return float("nan")
    return float(torch.nanquantile(t, torch.tensor(0.95, device=t.device), dim=0).item())


def _per_sample_metrics_from_saved(metrics_list: list, n: int) -> list[dict[str, float]]:
    per_sample: list[dict[str, float]] = [{} for _ in range(n)]
    for metric in metrics_list:
        for key, lst in metric.saved_metrics.items():
            for i, t in enumerate(lst):
                if i >= n:
                    break
                per_sample[i][key] = _to_scalar(t)
    return per_sample


def _load_pair_embeddings(
    sample_dir: Path,
) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None:
    motion_emb_path = sample_dir / "motion_embedding.npy"
    text_emb_path = sample_dir / "text_embedding.npy"
    gt_motion_emb_path = sample_dir / "gt_motion_embedding.npy"
    if not (motion_emb_path.is_file() and text_emb_path.is_file()):
        return None

    motion_emb = np.load(motion_emb_path)
    text_emb = np.load(text_emb_path)
    if motion_emb.ndim == 3 and motion_emb.shape[0] == 1:
        motion_emb = motion_emb[0]
    if text_emb.ndim == 3 and text_emb.shape[0] == 1:
        text_emb = text_emb[0]

    gt_motion_emb = None
    if gt_motion_emb_path.is_file():
        gt_motion_emb = np.load(gt_motion_emb_path)
        if gt_motion_emb.ndim == 3 and gt_motion_emb.shape[0] == 1:
            gt_motion_emb = gt_motion_emb[0]

    return motion_emb, text_emb, gt_motion_emb


def _load_npz_motion(
    npz_path: Path,
    device: str,
    soma30_skel: SOMASkeleton30 | None = None,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Load posed_joints and foot_contacts from an NPZ, upscaling SOMA30 to SOMA77 if needed."""
    data = np.load(npz_path)
    posed_joints = to_torch(data["posed_joints"], device=device)
    foot_contacts = to_torch(data["foot_contacts"], device=device)

    if posed_joints.shape[-2] == 30 and soma30_skel is not None:
        local_rot_mats = to_torch(data["local_rot_mats"], device=device)
        root_positions = to_torch(data["root_positions"], device=device)
        out77 = soma30_skel.output_to_SOMASkeleton77(
            {"local_rot_mats": local_rot_mats, "root_positions": root_positions, "foot_contacts": foot_contacts}
        )
        posed_joints = out77["posed_joints"]
        foot_contacts = out77["foot_contacts"]

    return posed_joints, foot_contacts


def _run_eval_on_group(
    group: list[tuple[Path, Path]],
    skeleton: torch.nn.Module,
    metrics_list: list,
    device: str,
    group_name: str = "",
    soma30_skel: SOMASkeleton30 | None = None,
) -> tuple[
    list[dict[str, float]],
    list[dict[str, float]],
    dict[str, float],
    dict[str, float],
    dict[str, float],
    list[dict[str, Any]],
]:
    """Run two passes: gen (motion.npz + embeddings) and GT (gt_motion.npz only). Return
    per_sample_gen, per_sample_gt, aggregated_gen, aggregated_gt, tmr_metrics, tmr_per_sample.
    """
    n = len(group)
    sample_ids: list[str] = []
    texts: list[str] = []
    motion_embs: list[np.ndarray] = []
    text_embs: list[np.ndarray] = []

    # ----- Pass 1: generation (motion.npz + all embeddings) -----
    clear_metrics(metrics_list)
    desc = f"Samples ({group_name})" if group_name else "Samples"
    for sample_dir, rel_path in tqdm(group, desc=desc, unit="motion"):
        stem = rel_path.name
        sample_ids.append(stem)
        meta_path = sample_dir / "meta.json"
        meta = load_json(meta_path)
        texts_parsed, _ = parse_prompts_from_meta(meta)
        texts.append(texts_parsed[0] if texts_parsed else "")

        posed_joints, foot_contacts = _load_npz_motion(sample_dir / "motion.npz", device, soma30_skel)
        nframes = posed_joints.shape[0]
        lengths = torch.tensor(nframes, dtype=torch.long, device=device)
        constraints_path = sample_dir / "constraints.json"
        constraints_lst = (
            load_constraints_lst(str(constraints_path), skeleton=skeleton) if constraints_path.is_file() else []
        )
        metrics_in: dict[str, Any] = {
            "posed_joints": posed_joints,
            "foot_contacts": foot_contacts,
            "lengths": lengths,
            "constraints_lst": constraints_lst,
        }
        text_this = texts_parsed[0] if texts_parsed else ""
        embs = _load_pair_embeddings(sample_dir)
        if (text_this or "").strip() and embs is not None:
            motion_emb, text_emb, gt_motion_emb = embs
            metrics_in["motion_emb"] = motion_emb
            metrics_in["text_emb"] = text_emb
            if gt_motion_emb is not None:
                metrics_in["gt_motion_emb"] = gt_motion_emb
            motion_embs.append(motion_emb)
            text_embs.append(text_emb)

        compute_metrics(metrics_list, metrics_in)

    per_sample_gen = _per_sample_metrics_from_saved(metrics_list, n)
    raw_aggregated_gen = aggregate_metrics(metrics_list)
    aggregated_gen = {}
    tmr_metrics: dict[str, float] = {}
    has_text = len(motion_embs) == n and len(text_embs) == n
    for key, v in raw_aggregated_gen.items():
        val = _to_scalar(v)
        if key.startswith("TMR/"):
            if has_text:
                tmr_metrics[key] = val
        else:
            aggregated_gen[key] = val
    if "constraint_root2d_err" in raw_aggregated_gen:
        aggregated_gen["constraint_root2d_err_p95"] = _to_p95(raw_aggregated_gen["constraint_root2d_err"])

    tmr_per_sample: list[dict[str, Any]] = []
    if has_text and motion_embs and text_embs and len(motion_embs) == n and len(text_embs) == n:
        motion_emb_stack = np.stack(motion_embs, axis=0)
        text_emb_stack = np.stack(text_embs, axis=0)
        tmr_per_sample = compute_tmr_per_sample_retrieval(motion_emb_stack, text_emb_stack, sample_ids, texts, top_k=5)

    # ----- Pass 2: GT (gt_motion.npz only, no embeddings) -----
    clear_metrics(metrics_list)
    for sample_dir, rel_path in tqdm(group, desc=f"GT ({group_name})" if group_name else "GT", unit="motion"):
        posed_joints, foot_contacts = _load_npz_motion(sample_dir / "gt_motion.npz", device, soma30_skel)
        nframes = posed_joints.shape[0]
        lengths = torch.tensor(nframes, dtype=torch.long, device=device)
        constraints_path = sample_dir / "constraints.json"
        constraints_lst = (
            load_constraints_lst(str(constraints_path), skeleton=skeleton) if constraints_path.is_file() else []
        )
        metrics_in = {
            "posed_joints": posed_joints,
            "foot_contacts": foot_contacts,
            "lengths": lengths,
            "constraints_lst": constraints_lst,
        }
        compute_metrics(metrics_list, metrics_in)

    per_sample_gt = _per_sample_metrics_from_saved(metrics_list, n)
    raw_aggregated_gt = aggregate_metrics(metrics_list)
    aggregated_gt = {}
    for key, v in raw_aggregated_gt.items():
        if key.startswith("TMR/"):
            continue
        aggregated_gt[key] = _to_scalar(v)
    if "constraint_root2d_err" in raw_aggregated_gt:
        aggregated_gt["constraint_root2d_err_p95"] = _to_p95(raw_aggregated_gt["constraint_root2d_err"])

    return (
        per_sample_gen,
        per_sample_gt,
        aggregated_gen,
        aggregated_gt,
        tmr_metrics,
        tmr_per_sample,
    )


def _write_json(path: Path, payload: dict[str, Any]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")


def main():
    parser = argparse.ArgumentParser(
        description="Recursively evaluate generated motions; write metrics.json per folder and <name>.json per parent.",
    )
    parser.add_argument(
        "folder",
        type=Path,
        help="Root folder to search recursively for meta.json + motion.npz + gt_motion.npz",
    )
    parser.add_argument("--device", default=None, help="cuda/cpu. Default: auto")
    args = parser.parse_args()

    folder = args.folder.resolve()
    if not folder.is_dir():
        raise SystemExit(f"Folder does not exist: {folder}")

    device = args.device or ("cuda" if torch.cuda.is_available() else "cpu")

    examples = discover_motion_folders(folder)
    if not examples:
        raise SystemExit(f"No directories with meta.json, motion.npz, and gt_motion.npz found under {folder}")
    print(f"Discovered {len(examples)} motion folders.")

    first_posed = np.load(examples[0][0] / "motion.npz")["posed_joints"]
    num_joints = first_posed.shape[-2]

    # SOMA models could generate 30-joint output; upscale to 77 for evaluation
    soma30_skel: SOMASkeleton30 | None = None
    if num_joints == 30:
        soma30_skel = SOMASkeleton30().to(device)
        _ = soma30_skel.somaskel77  # trigger lazy init
        soma30_skel.somaskel77.to(device)
        skeleton = soma30_skel.somaskel77
        print("Detected SOMA30 motions; will upscale to SOMA77 for evaluation.")
    else:
        skeleton = build_skeleton(num_joints).to(device)

    fps = DEFAULT_FPS
    kwargs = {"skeleton": skeleton, "fps": fps}
    metrics_list = [
        FootSkateFromHeight(**kwargs),
        FootSkateFromContacts(**kwargs),
        FootContactConsistency(**kwargs),
        FootSkateRatio(**kwargs),
        ContraintFollow(**kwargs),
        TMR_EmbeddingMetric(**kwargs),
    ]

    groups = group_by_parent(examples)
    for group in tqdm(groups, desc="Evaluating folders"):
        sample_dirs = [g[0] for g in group]
        folder_for_group = sample_dirs[0].parent
        folder_name = folder_for_group.name

        (
            per_sample_gen,
            per_sample_gt,
            aggregated_gen,
            aggregated_gt,
            tmr_metrics,
            tmr_per_sample,
        ) = _run_eval_on_group(group, skeleton, metrics_list, device, group_name=folder_name, soma30_skel=soma30_skel)

        texts = []
        for sample_dir, _ in group:
            meta = load_json(sample_dir / "meta.json")
            texts_parsed, _ = parse_prompts_from_meta(meta)
            texts.append(texts_parsed[0] if texts_parsed else "")

        for i, (sample_dir, _) in enumerate(group):
            metrics_path = sample_dir / "metrics.json"
            out = {
                "num_motions": 1,
                "folder": str(sample_dir),
                "per_motion_mean_gen": per_sample_gen[i] if i < len(per_sample_gen) else {},
                "per_motion_mean_gt": per_sample_gt[i] if i < len(per_sample_gt) else {},
            }
            if i < len(tmr_per_sample):
                out["tmr"] = {
                    "t2m_rank": tmr_per_sample[i]["rank"],
                    "text": texts[i] if i < len(texts) else "",
                    "top5_retrieved": tmr_per_sample[i]["top_k"],
                }
            _write_json(metrics_path, out)

        parent_json_path = folder_for_group.parent / f"{folder_name}.json"
        full_metrics = {
            "num_motions": len(group),
            "folder": str(folder_for_group),
            "per_motion_mean_gen": aggregated_gen,
            "per_motion_mean_gt": aggregated_gt,
        }
        if tmr_metrics:
            full_metrics["tmr"] = tmr_metrics
        _write_json(parent_json_path, full_metrics)

    print(f"Wrote metrics.json in each of {len(examples)} folders and folder-level JSONs for {len(groups)} groups.")


if __name__ == "__main__":
    main()


================================================
FILE: benchmark/generate_eval.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Step (2) of evaluation pipeline.

This script recursively generates motions using Kimodo from a test suite folder tree.
"""

import argparse
import shutil
from pathlib import Path
from typing import Any

import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm

from kimodo.constraints import load_constraints_lst
from kimodo.meta import parse_prompts_from_meta
from kimodo.model import DEFAULT_MODEL, load_model
from kimodo.tools import load_json, seed_everything


def parse_args():
    parser = argparse.ArgumentParser(description="Recursively generate motions from a testsuite folder tree")
    parser.add_argument(
        "--benchmark",
        type=str,
        default="testsuite",
        help="Root folder containing subfolders with meta.json (default: testsuite)",
    )
    parser.add_argument(
        "--output",
        type=str,
        default=None,
        help="Output root; directory hierarchy is mirrored here. If omitted, motions are generated in-place inside the testsuite folder.",
    )
    parser.add_argument(
        "--batch_size",
        type=int,
        default=32,
        help="Batch size for generating motions (default: 32)",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=4,
        help="DataLoader workers for loading meta/constraints paths (default: 4)",
    )
    parser.add_argument(
        "--model",
        type=str,
        default=DEFAULT_MODEL,
        help="Name of the model (e.g. Kimodo-SOMA-RP-v1.1, kimodo-soma-rp, or SOMA).",
    )
    parser.add_argument(
        "--diffusion_steps",
        type=int,
        default=100,
        help="Number of diffusion steps (default: 100); overridden by meta.json if present",
    )
    parser.add_argument(
        "--postprocess",
        action="store_true",
        help="Apply motion post-processing to reduce foot skating",
    )
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="Regenerate outputs even if motion.npz already exists",
    )
    parser.add_argument(
        "--text_encoder_fp32",
        action="store_true",
        help="Uses fp32 for instantiating the text encoder (if API is not already running) rather than default bfloat16.",
    )
    return parser.parse_args()


def discover_example_folders(root: Path) -> list[tuple[Path, Path]]:
    """Discover leaf directories that contain meta.json.

    Returns list of (src_dir, rel_path).
    """
    root = root.resolve()
    if not root.is_dir():
        raise FileNotFoundError(f"Testsuite folder does not exist: {root}")
    out: list[tuple[Path, Path]] = []
    for meta_path in root.rglob("meta.json"):
        src_dir = meta_path.parent
        rel = src_dir.relative_to(root)
        out.append((src_dir, rel))
    return sorted(out, key=lambda x: str(x[1]))


def copy_source_files(src_dir: Path, out_dir: Path) -> None:
    """Copy meta.json, constraints.json, and gt_motion.npz (if present) from src_dir to out_dir."""
    out_dir.mkdir(parents=True, exist_ok=True)
    for name in ("meta.json", "constraints.json", "gt_motion.npz"):
        src_file = src_dir / name
        if src_file.is_file():
            shutil.copy2(src_file, out_dir / name)


class EvalExampleDataset(Dataset):
    """Dataset of example folders: yields text, num_frame, constraints_path (and paths, meta).
    No torch/skeleton in workers so num_workers > 0 is safe with CUDA.
    """

    def __init__(
        self,
        examples: list[tuple[Path, Path]],
        testsuite_root: Path,
        generated_root: Path,
        fps: float,
    ):
        self.examples = examples
        self.testsuite_root = testsuite_root
        self.generated_root = generated_root
        self.fps = fps

    def __len__(self) -> int:
        return len(self.examples)

    def __getitem__(self, idx: int) -> dict[str, Any]:
        src_dir, rel_path = self.examples[idx]
        out_dir = self.generated_root / rel_path
        meta_path = src_dir / "meta.json"
        meta = load_json(str(meta_path))
        assert meta.get("num_samples", 1) == 1, "Expected num_samples to be absent or 1 in meta.json"
        texts, durations_sec = parse_prompts_from_meta(meta)
        assert len(texts) == 1, "Expected exactly one prompt (len(texts)==1) per example"
        num_frames = [int(float(d) * self.fps) for d in durations_sec]
        assert len(num_frames) == 1, "Expected exactly one duration per example"
        constraints_path = src_dir / "constraints.json"
        cpath = str(constraints_path) if constraints_path.is_file() else None
        return {
            "rel_path": rel_path,
            "src_dir": str(src_dir),
            "out_dir": str(out_dir),
            "meta": meta,
            "text": texts[0],
            "num_frame": num_frames[0],
            "constraints_path": cpath,
        }


def collate_examples(batch: list[dict]) -> dict[str, Any]:
    """Collate list of example dicts; keep list fields as lists (no stacking)."""
    if not batch:
        return {}
    keys = batch[0].keys()
    out: dict[str, Any] = {}
    for k in keys:
        vals = [b[k] for b in batch]
        out[k] = vals
    return out


def group_by_parent(
    examples: list[tuple[Path, Path]],
) -> list[list[tuple[Path, Path]]]:
    """Group (src_dir, rel_path) by parent directory of rel_path for folder-by-folder processing."""
    from itertools import groupby

    def parent_key(item: tuple[Path, Path]) -> Path:
        rel = item[1]
        return rel.parent if len(rel.parts) > 1 else Path(".")

    sorted_examples = sorted(examples, key=parent_key)
    groups: list[list[tuple[Path, Path]]] = []
    for _key, group in groupby(sorted_examples, key=parent_key):
        groups.append(list(group))
    return groups


def _slice_output_at(output: dict[str, Any], index: int) -> dict[str, Any]:
    """Slice a (possibly nested) output dict at batch index for one sample."""
    out: dict[str, Any] = {}
    for k, v in output.items():
        if isinstance(v, dict):
            out[k] = _slice_output_at(v, index)
        elif isinstance(v, np.ndarray) and v.ndim > 0:
            out[k] = v[index]
        else:
            out[k] = v
    return out


def _crop_output(output: dict[str, Any], num_frames: int) -> dict[str, Any]:
    """Crop a single-sample output dict along the time dimension (axis 0)."""
    out: dict[str, Any] = {}
    for k, v in output.items():
        if isinstance(v, dict):
            out[k] = _crop_output(v, num_frames)
        elif isinstance(v, np.ndarray) and v.ndim >= 1:
            out[k] = v[:num_frames]
        else:
            out[k] = v
    return out


def main():
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    args = parse_args()
    testsuite_root = Path(args.benchmark).resolve()
    if args.output is not None:
        generated_root = Path(args.output).resolve()
    else:
        generated_root = testsuite_root
    in_place = generated_root == testsuite_root

    examples = discover_example_folders(testsuite_root)
    if not examples:
        raise SystemExit(f"No folders with meta.json found under {testsuite_root}")
    print(f"Discovered {len(examples)} example folders.")

    model, resolved_name = load_model(
        args.model,
        device=device,
        default_family="Kimodo",
        return_resolved_name=True,
        text_encoder_fp32=args.text_encoder_fp32,
    )
    # v1.1 models are meant to be used for benchmark evaluation
    _deprecated_for_benchmark = {
        "kimodo-soma-rp-v1": "Kimodo-SOMA-RP-v1 was not trained to be compatible with the benchmark evaluation.",
        "kimodo-soma-seed-v1": "Kimodo-SOMA-SEED-v1 is not the latest model for benchmark evaluation.",
    }
    if resolved_name in _deprecated_for_benchmark:
        import warnings

        warnings.warn(
            f"Model '{args.model}' resolved to {resolved_name}: "
            f"{_deprecated_for_benchmark[resolved_name]} Consider using v1.1.",
            stacklevel=1,
        )
    print(f"Generating with model: {resolved_name}")
    fps = model.fps
    default_diffusion_steps = args.diffusion_steps

    groups = group_by_parent(examples)
    total_generated = 0
    total_skipped = 0

    total_examples = len(examples)
    for group in groups:
        rel_path_0 = group[0][1]
        if rel_path_0.parent != Path("."):
            folder_label = str(rel_path_0.parent)
        else:
            # Direct children of testsuite root: show root name (e.g. inbetweening)
            folder_label = testsuite_root.name
        num_in_folder = len(group)
        print(f"Generating folder: {folder_label} ({num_in_folder} motions)")

        dataset = EvalExampleDataset(
            group,
            testsuite_root,
            generated_root,
            fps=fps,
        )
        loader = DataLoader(
            dataset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.num_workers,
            collate_fn=collate_examples,
        )

        folder_generated = 0
        folder_skipped = 0
        for batch_idx, batch in enumerate(loader):
            rel_paths = batch["rel_path"]
            src_dirs = batch["src_dir"]
            out_dirs = batch["out_dir"]
            metas = batch["meta"]
            batch_texts = batch["text"]
            batch_num_frames = batch["num_frame"]
            constraints_paths = batch["constraints_path"]

            # Filter out samples that are already generated (unless --overwrite).
            if args.overwrite:
                selected_indices = list(range(len(rel_paths)))
            else:
                selected_indices = []
                for i, out_dir_str in enumerate(out_dirs):
                    motion_path = Path(out_dir_str) / "motion.npz"
                    if motion_path.is_file():
                        folder_skipped += 1
                        total_skipped += 1
                        continue
                    selected_indices.append(i)

            if not selected_indices:
                print(
                    f"\r  Generated {folder_generated} / {num_in_folder} (skipped: {folder_skipped}) "
                    f"(total: {total_generated + total_skipped} / {total_examples})",
                    end="",
                    flush=True,
                )
                continue

            rel_paths = [rel_paths[i] for i in selected_indices]
            src_dirs = [src_dirs[i] for i in selected_indices]
            out_dirs = [out_dirs[i] for i in selected_indices]
            metas = [metas[i] for i in selected_indices]
            batch_texts = [batch_texts[i] for i in selected_indices]
            batch_num_frames = [batch_num_frames[i] for i in selected_indices]
            constraints_paths = [constraints_paths[i] for i in selected_indices]

            # Load constraints in main process on model device (no torch in workers)
            device_t = torch.device(device)
            batch_constraints_lst = [
                load_constraints_lst(cpath, model.skeleton, device=device_t) if cpath else []
                for cpath in constraints_paths
            ]

            if not in_place:
                for i in range(len(rel_paths)):
                    copy_source_files(Path(src_dirs[i]), Path(out_dirs[i]))

            # Use first example's diffusion_steps and seed for the whole batch
            diffusion_steps = metas[0].get("diffusion_steps", default_diffusion_steps)
            seed = metas[0].get("seed", None)
            if seed is not None:
                seed_everything(seed)
            else:
                print("Warning: No seed found in meta.json, not seeding this batch.")

            # Single model call for the entire batch (count in bar title, bar clears when done)
            bar_desc = (
                f"  Generated {folder_generated} / {num_in_folder} "
                f"(skipped: {folder_skipped}) (total: {total_generated + total_skipped} / {total_examples})"
            )
            output = model(
                batch_texts,
                batch_num_frames,
                constraint_lst=batch_constraints_lst,
                num_denoising_steps=diffusion_steps,
                multi_prompt=False,
                post_processing=args.postprocess,
                return_numpy=True,
                progress_bar=lambda x: tqdm(x, leave=False, desc=bar_desc),
            )

            # Save each sample to its output dir
            B = len(batch_texts)
            for b in range(B):
                out_dir = Path(out_dirs[b])
                sample_output = _slice_output_at(output, b)
                sample_output = _crop_output(sample_output, batch_num_frames[b])
                motion_path = out_dir / "motion.npz"
                np.savez(motion_path, **sample_output)
                total_generated += 1
                folder_generated += 1

            print(
                f"\r  Generated {folder_generated} / {num_in_folder} (skipped: {folder_skipped}) "
                f"(total: {total_generated + total_skipped} / {total_examples})",
                end="",
                flush=True,
            )

        print()
        print(
            f"  Finished folder {folder_label} ({num_in_folder} motions, "
            f"generated: {folder_generated}, skipped: {folder_skipped})."
        )

    if in_place:
        print(f"Generated {total_generated} motions in-place under {testsuite_root}.")
    else:
        print(f"Generated {total_generated} motions under {generated_root}.")


if __name__ == "__main__":
    main()


================================================
FILE: benchmark/parse_folder.py
================================================
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Step (5) of evaluation pipeline.

Validate testcase result JSONs and aggregate benchmark rows.

Expected testsuite layout (aligned with evaluate_folder output):

    <root>/
    ├── <split>/                    # e.g. content, repetition
    │   ├── text2motion/            # text-following eval
    │   │   ├── overview/           # or timeline_single, timeline_multi
    │   │   │   └── <testcase>.json
    │   │   └── ...
    │   └── <category>/             # constraints_withtext, constraints_notext
    │       └── .../                 # optional subdirs, e.g. root, fullbody
    │           └── <testcase>/
    │           └── <testcase>.json

Samples are discovered via rglob('meta.json') with motion.npz and gt_motion.npz in the same dir.
Testcase dir = parent of a sample dir. Result file = testcase_dir.parent / f"{testcase_dir.name}.json".
"""

from __future__ import annotations

import argparse
import json
from collections import defaultdict
from pathlib import Path
from typing import Any

SPLITS = ("content", "repetition")
TEXT_FOLLOWING_CATEGORIES = ("overview", "timeline_single", "timeline_multi")
CONSTRAINTS_CATEGORIES = ("constraints_withtext", "constraints_notext")
ROW_CATEGORIES = TEXT_FOLLOWING_CATEGORIES + CONSTRAINTS_CATEGORIES


def _discover_sample_dirs(root: Path) -> list[Path]:
    sample_dirs: list[Path] = []
    for meta_path in root.rglob("meta.json"):
        sample_dir = meta_path.parent
        if (sample_dir / "motion.npz").is_file() and (sample_dir / "gt_motion.npz").is_file():
            sample_dirs.append(sample_dir)
    return sorted(set(sample_dirs))


def _discover_testcase_dirs(root: Path) -> list[Path]:
    sample_dirs = _discover_sample_dirs(root)
    return sorted({sample_dir.parent for sample_dir in sample_dirs})


def _expected_result_path(testcase_dir: Path) -> Path:
    return testcase_dir.parent / f"{testcase_dir.name}.json"


def _parse_testcase_key(root: Path, testcase_dir: Path) -> tuple[str, str]:
    rel_parts = testcase_dir.relative_to(root).parts
    if len(rel_parts) < 2:
        raise ValueError(f"Unexpected testcase path shape: {testcase_dir} (relative: {'/'.join(rel_parts)})")
    split = rel_parts[0]
    if split not in SPLITS:
        raise ValueError(f"Unknown split '{split}' for testcase {testcase_dir}")
    if len(rel_parts) >= 3 and rel_parts[1] == "text2motion":
        category = rel_parts[2]
        if category not in TEXT_FOLLOWING_CATEGORIES:
            raise ValueError(f"Unknown text-following category '{category}' for testcase {testcase_dir}")
    else:
        category = rel_parts[1]
        if category not in CONSTRAINTS_CATEGORIES:
            raise ValueError(f"Unknown category '{category}' for testcase {testcase_dir}")
    return split, category


def _accumulate_weighted(
    sum_acc: dict[str, float],
    weight_acc: dict[str, float],
    metric_dict: dict[str, Any],
    weight: float,
) -> None:
    for metric_name, value in metric_dict.items():
        if isinstance(value, (int, float)):
            sum_acc[metric_name] = sum_acc.get(metric_name, 0.0) + float(value) * weight
            weight_acc[metric_name] = weight_acc.get(metric_name, 0.0) + weight


def _to_averages(
    weighted_sum: dict[str, float], weight: dict[str, float]
) -> dict[str, float]:
    return {
        k: v / weight[k]
        for k, v in sorted(weighted_sum.items())
        if weight.get(k, 0.0) > 0
    }


def _load_result_row(
    result_path: Path,
) -> tuple[float, dict[str, Any], dict[str, Any], dict[str, Any]]:
    payload = json.loads(result_path.read_text(encoding="utf-8"))
    num_motions = float(payload.get("num_motions", 1))
    per_motion_mean_gen = payload.get("per_motion_mean_gen") or payload.get("per_motion_mean", {})
    per_motion_mean_gt = payload.get("per_motion_mean_gt") or {}
    tmr = payload.get("tmr") or {}
    if not isinstance(per_motion_mean_gen, dict):
        raise ValueError(f"'per_motion_mean_gen' / 'per_motion_mean' is not a dict in {result_path}")
    if not isinstance(per_motion_mean_gt, dict):
        raise ValueError(f"'per_motion_mean_gt' is not a dict in {result_path}")
    if not isinstance(tmr, dict):
        raise ValueError(f"'tmr' is not a dict in {result_path}")
    return num_motions, per_motion_mean_gen, per_motion_mean_gt, tmr


# Display labels for table rows (paper-style).
TEXT_FOLLOWING_ROW_LABELS = {
    "overview": "Overview",
    "timeline_single": "Timeline single",
    "timeline_multi": "Timeline multi",
}
CONSTRAINTS_ROW_LABELS = {
    "constraints_withtext": "Constraints with text",
    "constraints_notext": "Constraints without text",
}

# Meters to cm for constraint position metrics.
M_TO_CM = 100.0


def _table_value(val: float | None) -> float | str | None:
    """Return value for JSON table; use None for missing (omit or serialize as null)."""
    if val is None:
        return None
    if isinstance(val, (int, float)) and (val != val or val == float("inf")):  # nan or inf
        return None
    return val


def _build_tables(
    row_acc: dict[tuple[str, str], dict[str, Any]],
) -> dict[str, dict[str, list[dict[str, Any]]]]:
    """Build text_following and constraints tables per split for paper-style output."""
    tables: dict[str, dict[str, list[dict[str, Any]]]] = {}
    for split in SPLITS:
        tables[split] = {"text_following": [], "constraints": []}

        # Text-following table: Overview, Timeline single, Timeline multi.
        for category in TEXT_FOLLOWING_CATEGORIES:
            acc = row_acc[(split, category)]
            per_motion_gen = _to_averages(acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"])
            per_motion_gt = _to_averages(acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"])
            tmr_avg = _to_averages(acc["tmr_weighted_sum"], acc["tmr_weight"]) if acc["tmr_weight"] else {}
            r03_gen = tmr_avg.get("TMR/t2m_R/R03")
            r03_gt = tmr_avg.get("TMR/t2m_gt_R/R03")
            fid_gen_text = tmr_avg.get("TMR/FID/gen_text")
            fid_gt_text = tmr_avg.get("TMR/FID/gt_text")
            fid_gen_gt = tmr_avg.get("TMR/FID/gen_gt")
            # Skate is velocity in m/s; convert to cm/s for display.
            skate_gen = per_motion_gen.get("foot_skate_from_pred_contacts")
            skate_gt = per_motion_gt.get("foot_skate_from_pred_contacts")
            contact_gen = per_motion_gen.get("foot_contact_consistency")
            contact_gt = per_motion_gt.get("foot_contact_consistency")
            row_label = TEXT_FOLLOWING_ROW_LABELS[category]
            tables[split]["text_following"].append(
                {
                    "row": row_label,
                    "R@3 (gen)": _table_value(r03_gen),
                    "R@3 (GT)": _table_value(r03_gt),
                    "FID gen-text": _table_value(fid_gen_text),
                    "FID GT-text": _table_value(fid_gt_text),
                    "FID gen-GT": _table_value(fid_gen_gt),
                    "Skate (gen, cm/s)": _table_value(skate_gen * 100.0 if skate_gen is not None else None),
                    "Skate (GT, cm/s)": _table_value(skate_gt * 100.0 if skate_gt is not None else None),
                    "Contact (gen)": _table_value(contact_gen),
                    "Contact (GT)": _table_value(contact_gt),
                }
            )

        # Constraints table: Constraints with text, Constraints without text.
        for category in CONSTRAINTS_CATEGORIES:
            acc = row_acc[(split, category)]
            per_motion_gen = _to_averages(acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"])
            per_motion_gt = _to_averages(acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"])
            row_label = CONSTRAINTS_ROW_LABELS[category]
            row_dict: dict[str, Any] = {
                "row": row_label,
                "Full-Body Pos (gen, cm)": _table_value(
                    per_motion_gen.get("constraint_fullbody_keyframe") * M_TO_CM
                    if per_motion_gen.get("constraint_fullbody_keyframe") is not None
                    else None
                ),
                "Full-Body Pos (GT, cm)": _table_value(
                    per_motion_gt.get("constraint_fullbody_keyframe") * M_TO_CM
                    if per_motion_gt.get("constraint_fullbody_keyframe") is not None
                    else None
                ),
                "End-Effector Pos (gen, cm)": _table_value(
                    per_motion_gen.get("constraint_end_effector") * M_TO_CM
                    if per_motion_gen.get("constraint_end_effector") is not None
                    else None
                ),
                "End-Effector Pos (GT, cm)": _table_value(
                    per_motion_gt.get("constraint_end_effector") * M_TO_CM
                    if per_motion_gt.get("constraint_end_effector") is not None
                    else None
                ),
                "End-Effector Rot (deg)": None,  # Not implemented in metrics.
                "2D Root Pos (gen, cm)": _table_value(
                    per_motion_gen.get("constraint_root2d_err") * M_TO_CM
                    if per_motion_gen.get("constraint_root2d_err") is not None
                    else None
                ),
                "2D Root Pos (GT, cm)": _table_value(
                    per_motion_gt.get("constraint_root2d_err") * M_TO_CM
                    if per_motion_gt.get("constraint_root2d_err") is not None
                    else None
                ),
                "2D Pelvis Pos@95% (gen, cm)": _table_value(
                    per_motion_gen.get("constraint_root2d_err_p95") * M_TO_CM
                    if per_motion_gen.get("constraint_root2d_err_p95") is not None
                    else None
                ),
                "2D Pelvis Pos@95% (GT, cm)": _table_value(
                    per_motion_gt.get("constraint_root2d_err_p95") * M_TO_CM
                    if per_motion_gt.get("constraint_root2d_err_p95") is not None
                    else None
                ),
            }
            tables[split]["constraints"].append(row_dict)

    return tables


def _fmt_md(val: float | None, decimals: int) -> str:
    """Format a numeric value for a markdown cell, or '-' for None/NaN."""
    if val is None:
        return "-"
    if isinstance(val, float) and (val != val or val == float("inf")):
        return "-"
    return f"{val:.{decimals}f}"


def _print_tf_formatted_md(
    splits_data: list[tuple[str, list[dict[str, Any]]]],
    title: str,
) -> None:
    """Print text-following table in markdown, mirroring the terminal layout."""
    groups = ["Overview", "Timeline single", "Timeline multi"]
    specs: list[tuple[str, int]] = [
        ("R@3\u2191", 2),
        ("FID\u2193", 3),
        ("Skate\u2193", 3),
        ("Contact\u2191", 3),
    ]
    gt_keys = ["R@3 (GT)", None, "Skate (GT, cm/s)", "Contact (GT)"]
    gen_keys = ["R@3 (gen)", "FID gen-GT", "Skate (gen, cm/s)", "Contact (gen)"]
    gt_defaults: list[float | None] = [None, 0.0, None, None]

    headers = [""]
    for g in groups:
        for hdr, _ in specs:
            headers.append(f"{g} {hdr}")

    print(f"\n### {title}\n")
    print("| " + " | ".join(headers) + " |")
    print("| " + " | ".join("---" for _ in headers) + " |")

    for split_label, rows in splits_data:
        for row_type, keys, defaults in [
            ("Ground Truth", gt_keys, gt_defaults),
            ("Method", gen_keys, [None] * len(specs)),
        ]:
            cells = [f"**{split_label}** {row_type}"]
            for row in rows:
                for j, (_, dec) in enumerate(specs):
                    key = keys[j]
                    val = defaults[j] if key is None else row.get(key)
                    cells.append(_fmt_md(val, dec))
            print("| " + " | ".join(cells) + " |")

    print()


def _print_c_formatted_md(
    splits_data: list[tuple[str, list[dict[str, Any]]]],
    title: str,
) -> None:
    """Print constraints table in markdown, mirroring the terminal layout."""
    groups = ["With text", "Without text"]
    specs: list[tuple[str, int]] = [
        ("FB Pos\u2193", 3),
        ("EE Pos\u2193", 3),
        ("EE Rot\u2193", 3),
        ("2D Root\u2193", 3),
        ("Pelvis@95%", 2),
    ]
    gt_keys = [
        "Full-Body Pos (GT, cm)",
        "End-Effector Pos (GT, cm)",
        "End-Effector Rot (deg)",
        "2D Root Pos (GT, cm)",
        "2D Pelvis Pos@95% (GT, cm)",
    ]
    gen_keys = [
        "Full-Body Pos (gen, cm)",
        "End-Effector Pos (gen, cm)",
        "End-Effector Rot (deg)",
        "2D Root Pos (gen, cm)",
        "2D Pelvis Pos@95% (gen, cm)",
    ]

    headers = [""]
    for g in groups:
        for hdr, _ in specs:
            headers.append(f"{g} {hdr}")

    print(f"\n### {title}\n")
    print("| " + " | ".join(headers) + " |")
    print("| " + " | ".join("---" for _ in headers) + " |")

    for split_label, rows in splits_data:
        for row_type, keys in [("Ground Truth", gt_keys), ("Method", gen_keys)]:
            cells = [f"**{split_label}** {row_type}"]
            for row in rows:
                for j, (_, dec) in enumerate(specs):
                    cells.append(_fmt_md(row.get(keys[j]), dec))
            print("| " + " | ".join(cells) + " |")

    print()


def _print_formatted_gt_method_md(
    tables: dict[str, dict[str, list[dict[str, Any]]]],
) -> None:
    """Print combined tables in markdown format, mirroring the terminal layout."""
    tf_splits: list[tuple[str, list[dict[str, Any]]]] = []
    c_splits: list[tuple[str, list[dict[str, Any]]]] = []
    for split in SPLITS:
        split_tables = tables.get(split, {})
        tf_rows = split_tables.get("text_following", [])
        c_rows = split_tables.get("constraints", [])
        if tf_rows and len(tf_rows) == 3:
            tf_splits.append((split.capitalize(), tf_rows))
        if c_rows and len(c_rows) == 2:
            c_splits.append((split.capitalize(), c_rows))

    if tf_splits:
        _print_tf_formatted_md(tf_splits, "Text-Following Evaluation")
    if c_splits:
        _print_c_formatted_md(c_splits, "Constrained Evaluation")


def _fmt(val: float | None, decimals: int, width: int) -> str:
    """Format a numeric value right-aligned to *width*, or '-' for None."""
    if val is None:
        return f"{'-':>{width}}"
    return f"{val:>{width}.{decimals}f}"


def _print_grouped_rows(
    label: str,
    rows: list[dict[str, Any]],
    specs: list[tuple[str, int, int]],
    keys: list[str],
    mw: int,
    sep: str,
) -> None:
    """Print one data row across all column groups."""
    parts = [f"{label:<{mw}}"]
    for i, row in enumerate(rows):
        if i:
            parts.append(sep)
        for j, (_, dec, w) in enumerate(specs):
            parts.append(_fmt(row.get(keys[j]), dec, w))
    print("".join(parts))


def _print_tf_formatted(
    splits_data: list[tuple[str, list[dict[str, Any]]]],
    title: str,
) -> None:
    """Print text-following table with Overview / Timeline single / Timeline multi groups.

    *splits_data* is a list of ``(split_label, category_rows)`` tuples so
    that content and repetition splits appear as separate row-pairs inside
    one table.
    """
    groups = ["Overview", "Timeline single", "Timeline multi"]
    specs: list[tuple[str, int, int]] = [
        ("R@3\u2191", 2, 7),
        ("FID\u2193", 3, 7),
        ("Skate\u2193", 3, 9),
        ("Contact\u2191", 3, 10),
    ]
    gt_keys = ["R@3 (GT)", None, "Skate (GT, cm/s)", "Contact (GT)"]
    gen_keys = ["R@3 (gen)", "FID gen-GT", "Skate (gen, cm/s)", "Contact (gen)"]
    gt_defaults: list[float | None] = [None, 0.0, None, None]

    mw = 16
    gw = sum(s[2] for s in specs)
    sep = " | "
    total_w = mw + len(groups) * gw + (len(groups) - 1) * len(sep)

    print(f"\n{title:^{total_w}}")
    print("=" * total_w)

    parts: list[str] = [" " * mw]
    for i, g in enumerate(groups):
        if i:
            parts.append(sep)
        parts.append(g.center(gw))
    print("".join(parts))

    parts = [f"{'':<{mw}}"]
    for i in range(len(groups)):
        if i:
            parts.append(sep)
        for hdr, _, w in specs:
            parts.append(f"{hdr:>{w}}")
    print("".join(parts))

    parts = ["\u2500" * mw]
    for i in range(len(groups)):
        if i:
            parts.append("\u2500\u253c\u2500")
        parts.append("\u2500" * gw)
    print("".join(parts))

    for si, (split_label, rows) in enumerate(splits_data):
        tag = f"\u2500\u2500 {split_label} "
        print(tag + "\u2500" * (total_w - len(tag)))

        parts = [f"{'Ground Truth':<{mw}}"]
        for i, row in enumerate(rows):
            if i:
                parts.append(sep)
            for j, (_, dec, w) in enumerate(specs):
                key = gt_keys[j]
                val = gt_defaults[j] if key is None else row.get(key)
                parts.append(_fmt(val, dec, w))
        print("".join(parts))

        _print_grouped_rows("Method", rows, specs, gen_keys, mw, sep)

    print()


def _print_c_formatted(
    splits_data: list[tuple[str, list[dict[str, Any]]]],
    title: str,
) -> None:
    """Print constraints table with With text / Without text groups.

    *splits_data* is a list of ``(split_label, category_rows)`` tuples.
    """
    groups = ["With text", "Without text"]
    specs: list[tuple[str, int, int]] = [
        ("FB Pos\u2193", 3, 10),
        ("EE Pos\u2193", 3, 10),
        ("EE Rot\u2193", 3, 10),
        ("2D Root\u2193", 3, 11),
        ("Pelvis@95%", 2, 12),
    ]
    gt_keys = [
        "Full-Body Pos (GT, cm)",
        "End-Effector Pos (GT, cm)",
        "End-Effector Rot (deg)",
        "2D Root Pos (GT, cm)",
        "2D Pelvis Pos@95% (GT, cm)",
    ]
    gen_keys = [
        "Full-Body Pos (gen, cm)",
        "End-Effector Pos (gen, cm)",
        "End-Effector Rot (deg)",
        "2D Root Pos (gen, cm)",
        "2D Pelvis Pos@95% (gen, cm)",
    ]

    mw = 16
    gw = sum(s[2] for s in specs)
    sep = " | "
    total_w = mw + len(groups) * gw + (len(groups) - 1) * len(sep)

    print(f"\n{title:^{total_w}}")
    print("=" * total_w)

    parts: list[str] = [" " * mw]
    for i, g in enumerate(groups):
        if i:
            parts.append(sep)
        parts.append(g.center(gw))
    print("".join(parts))

    parts = [f"{'':<{mw}}"]
    for i in range(len(groups)):
        if i:
            parts.append(sep)
        for hdr, _, w in specs:
            parts.append(f"{hdr:>{w}}")
    print("".join(parts))

    parts = ["\u2500" * mw]
    for i in range(len(groups)):
        if i:
            parts.append("\u2500\u253c\u2500")
        parts.append("\u2500" * gw)
    print("".join(parts))

    for si, (split_label, rows) in enumerate(splits_data):
        tag = f"\u2500\u2500 {split_label} "
        print(tag + "\u2500" * (total_w - len(tag)))

        _print_grouped_rows("Ground Truth", rows, specs, gt_keys, mw, sep)
        _print_grouped_rows("Method", rows, specs, gen_keys, mw, sep)

    print()


def _print_formatted_gt_method(
    tables: dict[str, dict[str, list[dict[str, Any]]]],
) -> None:
    """Print combined tables with column groups separated by vertical bars.

    Content and repetition splits are shown as separate row-pairs inside one text-following table
    and one constraints table.
    """
    tf_splits: list[tuple[str, list[dict[str, Any]]]] = []
    c_splits: list[tuple[str, list[dict[str, Any]]]] = []
    for split in SPLITS:
        split_tables = tables.get(split, {})
        tf_rows = split_tables.get("text_following", [])
        c_rows = split_tables.get("constraints", [])
        if tf_rows and len(tf_rows) == 3:
            tf_splits.append((split.capitalize(), tf_rows))
        if c_rows and len(c_rows) == 2:
            c_splits.append((split.capitalize(), c_rows))

    if tf_splits:
        _print_tf_formatted(tf_splits, "Text-Following Evaluation")
    if c_splits:
        _print_c_formatted(c_splits, "Constrained Evaluation")


def _build_summary(root: Path) -> dict[str, Any]:
    testcase_dirs = _discover_testcase_dirs(root)
    if not testcase_dirs:
        raise SystemExit(
            f"No testcase folders found under {root} (expected folders containing meta.json + motion.npz + gt_motion.npz samples)."
        )

    missing_results: list[Path] = []
    for testcase_dir in testcase_dirs:
        result_path = _expected_result_path(testcase_dir)
        if not result_path.is_file():
            missing_results.append(result_path)

    if missing_results:
        missing_text = "\n".join(str(path) for path in missing_results)
        raise SystemExit(f"Missing {len(missing_results)} testcase result JSON files:\n{missing_text}")

    row_acc: dict[tuple[str, str], dict[str, Any]] = {}
    for split in SPLITS:
        for category in ROW_CATEGORIES:
            row_acc[(split, category)] = {
                "num_testcases": 0,
                "num_motions": 0.0,
                "per_motion_mean_weighted_sum": {},
                "per_motion_mean_weight": {},
                "per_motion_mean_gt_weighted_sum": {},
                "per_motion_mean_gt_weight": {},
                "tmr_weighted_sum": {},
                "tmr_weight": {},
            }

    for testcase_dir in testcase_dirs:
        split, category = _parse_testcase_key(root, testcase_dir)
        result_path = _expected_result_path(testcase_dir)
        num_motions, per_motion_mean_gen, per_motion_mean_gt, tmr = _load_result_row(result_path)

        acc = row_acc[(split, category)]
        acc["num_testcases"] += 1
        acc["num_motions"] += num_motions
        _accumulate_weighted(
            acc["per_motion_mean_weighted_sum"],
            acc["per_motion_mean_weight"],
            per_motion_mean_gen,
            num_motions,
        )
        if per_motion_mean_gt:
            _accumulate_weighted(
                acc["per_motion_mean_gt_weighted_sum"],
                acc["per_motion_mean_gt_weight"],
                per_motion_mean_gt,
                num_motions,
            )
        if tmr:
            _accumulate_weighted(
                acc["tmr_weighted_sum"],
                acc["tmr_weight"],
                tmr,
                num_motions,
            )

    rows: list[dict[str, Any]] = []
    for split in SPLITS:
        for category in ROW_CATEGORIES:
            acc = row_acc[(split, category)]
            tmr_avg = _to_averages(acc["tmr_weighted_sum"], acc["tmr_weight"]) if acc["tmr_weight"] else {}
            per_motion_gt_avg = _to_averages(acc["per_motion_mean_gt_weighted_sum"], acc["per_motion_mean_gt_weight"])
            row_dict: dict[str, Any] = {
                "split": split,
                "category": category,
                "num_testcases": acc["num_testcases"],
                "num_motions": int(acc["num_motions"]),
                "per_motion_mean": _to_averages(acc["per_motion_mean_weighted_sum"], acc["per_motion_mean_weight"]),
                "tmr": tmr_avg,
            }
            if per_motion_gt_avg:
                row_dict["per_motion_mean_gt"] = per_motion_gt_avg
            rows.append(row_dict)

        # Combined constraints row for this split.
        withtext = row_acc[(split, "constraints_withtext")]
        notext = row_acc[(split, "constraints_notext")]

        combined_per_motion = defaultdict(float)
        combined_per_motion_weight = defaultdict(float)
        combined_per_motion_gt = defaultdict(float)
        combined_per_motion_gt_weight = defaultdict(float)
        combined_tmr = defaultdict(float)
        combined_tmr_weight = defaultdict(float)
        for sum_key, weight_key, sum_acc, weight_acc in (
            ("per_motion_mean_weighted_sum", "per_motion_mean_weight", combined_per_motion, combined_per_motion_weight),
            ("per_motion_mean_gt_weighted_sum", "per_motion_mean_gt_weight", combined_per_motion_gt, combined_per_motion_gt_weight),
            ("tmr_weighted_sum", "tmr_weight", combined_tmr, combined_tmr_weight),
        ):
            for src in (withtext, notext):
                for k, v in src[sum_key].items():
                    sum_acc[k] += v
                for k, w in src[weight_key].items():
                    weight_acc[k] += w

        combined_tmr_avg = _to_averages(dict(combined_tmr), dict(combined_tmr_weight)) if combined_tmr_weight else {}
        combined_gt_avg = _to_averages(dict(combined_per_motion_gt), dict(combined_per_motion_gt_weight))
        combined_row: dict[str, Any] = {
            "split": split,
            "category": "constraints",
            "num_testcases": withtext["num_testcases"] + notext["num_testcases"],
            "num_motions": int(withtext["num_motions"] + notext["num_motions"]),
            "per_motion_mean": _to_averages(dict(combined_per_motion), dict(combined_per_motion_weight)),
            "tmr": combined_tmr_avg,
        }
        if combined_gt_avg:
            combined_row["per_motion_mean_gt"] = combined_gt_avg
        rows.append(combined_row)

    tables = _build_tables(row_acc)
    return {
        "folder": str(root),
        "num_testcases": len(testcase_dirs),
        "rows": rows,
        "tables": tables,
    }


def main() -> None:
    parser = argparse.ArgumentParser(
        description=("Validate testcase XXX.json result files and aggregate averages by split/category.")
    )
    parser.add_argument(
        "folder",
        type=Path,
        help="Testsuite root folder (contains content/ and repetition/).",
    )
    parser.add_argument(
        "--output",
        type=Path,
        default=None,
        help="Optional output JSON path. Default: <folder>/summary_rows.json",
    )
    parser.add_argument(
        "--format",
        choices=["terminal", "md"],
        default="terminal",
        dest="table_format",
        help="Table output format: 'terminal' (default) for fixed-width tables, 'md' for markdown.",
    )
    args = parser.parse_args()

    folder = args.folder.resolve()
    if not folder.is_dir():
        raise SystemExit(f"Folder does not exist: {folder}")

    summary = _build_summary(folder)

    out_path = args.output.resolve() if args.output else folder / "summary_rows.json"
    out_path.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
    print(f"Wrote aggregated summary: {out_path}")
    print(f"Rows: {len(summary['rows'])}, testcases: {summary['num_testcases']}")
    if args.table_format == "md":
        _print_formatted_gt_method_md(summary["tables"])
    else:
        _print_formatted_gt_method(summary["tables"])


if __name__ == "__main__":
    main()


================================================
FILE: docker-compose.yaml
================================================
services:
  text-encoder:
    build:
      context: .
      dockerfile: Dockerfile
    image: kimodo:1.0
    container_name: text-encoder
    working_dir: /workspace
    command: python -m kimodo.scripts.run_text_encoder_server
    volumes:
      - ./:/workspace
      # Cache HF downloads in host "system-wide" Hugging Face cache.
      - ${HOME}/.cache/huggingface:/workspace/.cache/huggingface
      # Mount the host HF auth token at the standard cache location in-container.
      - ${HOME}/.cache/huggingface/token:/workspace/.cache/huggingface/token:ro
    # expose to your host browser
    ports:
      - "9550:9550"
    environment:
      # Make Gradio reachable from other containers
      # - GRADIO_SERVER_NAME=0.0.0.0
      # - GRADIO_SERVER_PORT=9550
      - HF_HOME=/workspace/.cache/huggingface
      # Host user mapping (for non-root ownership + proper shell prompt)
      - HOST_USER=${USER:-user}

      # GPU
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility

    shm_size: "16gb"
    ipc: host

    # Wait until Gradio responds on HTTP
    healthcheck:
      test:
        ["CMD", "bash", "-lc", "curl -fsS http://localhost:9550/ > /dev/null"]
      interval: 3s
      timeout: 2s
      retries: 40

    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]

  demo:
    build:
      context: .
      dockerfile: Dockerfile
    image: kimodo:1.0
    container_name: demo
    working_dir: /workspace
    command: python -m kimodo.demo
    volumes:
      - ./:/workspace
      - ${HOME}/.cache/huggingface:/workspace/.cache/huggingface
      - ${HOME}/.cache/huggingface/token:/workspace/.cache/huggingface/token:ro
      # Explicit checkpoint mount (avoids surprises if the repo bind mount isn't what you expect).
      - ./checkpoints:/workspace/checkpoints:ro
    ports:
      - "${SERVER_PORT:-7860}:${SERVER_PORT:-7860}"
    environment:
      # Point the model at the text-encoder service.
      - TEXT_ENCODER_URL=http://text-encoder:9550/
      # Make checkpoint paths robust (Hydra config reads this).
      - SERVER_PORT=${SERVER_PORT:-7860}
      - HF_HOME=/workspace/.cache/huggingface
      # Host user mapping (for non-root ownership + proper shell prompt)
      - HOST_USER=${USER:-user}

      # GPU
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility

    shm_size: "16gb"
    ipc: host

    depends_on:
      text-encoder:
        condition: service_healthy

    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]


================================================
FILE: docker_requirements.in
================================================
#
# Human-maintained direct dependencies (top-level).
# Use `uv` to compile this into a fully pinned `requirements.txt` lockfile.
#
# IMPORTANT:
# - We intentionally do NOT list `torch` here because the Docker image base
#   (`nvcr.io/nvidia/pytorch`) already provides it. Installing torch via pip
#   during image build is slow and can lead to ABI/CUDA mismatches.
# - If you are NOT using Docker, install an appropriate PyTorch build separately.
#

# Config / wiring
hydra-core>=1.3
omegaconf>=2.3

# Core numerics
numpy>=1.23,<2
scipy>=1.10,<2

# Model / embeddings
# NOTE: `kimodo/model/llm2vec` is has only been tested with transformers==5.1.0
transformers==5.1.0
urllib3>=2.6.3
boto3
peft>=0.12
einops>=0.7

# Misc
tqdm>=4.0
packaging>=21.0
pydantic>=2.0

# UI / client
filelock>=3.20.3
gradio>=6.8.0
gradio_client>=1.0

# Visualization
trimesh>=3.21.7
scenepic>=1.1.0
pillow>=9.0
av>=16.1.0

py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git

# Local packages (editable installs for viser and kimodo; MotionCorrection non-editable)
./MotionCorrection
-e .
-e ./kimodo-viser


================================================
FILE: docker_requirements.txt
================================================
# This file was autogenerated by uv via the following command:
# NOTE: `torch` (and its CUDA wheels) are intentionally omitted from this lockfile.
# The Docker base image (nvcr.io/nvidia/pytorch) already provides a tested PyTorch build.
#
#    uv pip compile docker_requirements.in -o docker_requirements.txt --python-version 3.10 --python-platform x86_64-manylinux2014
-e .
    # via -r docker_requirements.in
-e ./kimodo-viser
    # via -r docker_requirements.in
py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git
    # via -r docker_requirements.in
accelerate==1.13.0
    # via peft
aiofiles==24.1.0
    # via gradio
annotated-doc==0.0.4
    # via
    #   fastapi
    #   typer
annotated-types==0.7.0
    # via pydantic
antlr4-python3-runtime==4.9.3
    # via
    #   hydra-core
    #   omegaconf
anyio==4.12.1
    # via
    #   gradio
    #   httpx
    #   starlette
attrs==25.4.0
    # via
    #   jsonschema
    #   referencing
av==16.1.0
    # via
    #   -r docker_requirements.in
    #   kimodo
boto3==1.42.66
    # via
    #   -r docker_requirements.in
    #   kimodo
botocore==1.42.66
    # via
    #   boto3
    #   s3transfer
brotli==1.2.0
    # via gradio
certifi==2026.2.25
    # via
    #   httpcore
    #   httpx
    #   requests
charset-normalizer==3.4.5
    # via
    #   requests
    #   trimesh
click==8.3.1
    # via
    #   typer
    #   uvicorn
colorlog==6.10.1
    # via trimesh
einops==0.8.2
    # via
    #   -r docker_requirements.in
    #   kimodo
embreex==2.17.7.post7
    # via trimesh
exceptiongroup==1.3.1
    # via anyio
fastapi==0.135.1
    # via gradio
ffmpy==1.0.0
    # via gradio
filelock==3.25.2
    # via
    #   -r docker_requirements.in
    #   huggingface-hub
    #   kimodo
    #   torch
fsspec==2026.2.0
    # via
    #   gradio-client
    #   huggingface-hub
    #   torch
gradio==6.9.0
    # via
    #   -r docker_requirements.in
    #   kimodo
gradio-client==2.3.0
    # via
    #   -r docker_requirements.in
    #   gradio
    #   kimodo
groovy==0.1.2
    # via gradio
h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
hf-xet==1.4.0
    # via huggingface-hub
httpcore==1.0.9
    # via httpx
httpx==0.28.1
    # via
    #   gradio
    #   gradio-client
    #   huggingface-hub
    #   safehttpx
    #   trimesh
huggingface-hub==1.6.0
    # via
    #   accelerate
    #   gradio
    #   gradio-client
    #   peft
    #   tokenizers
    #   transformers
hydra-core==1.3.2
    # via
    #   -r docker_requirements.in
    #   kimodo
idna==3.11
    # via
    #   anyio
    #   httpx
    #   requests
imageio==2.37.3
    # via viser
jinja2==3.1.6
    # via
    #   gradio
    #   torch
jmespath==1.1.0
    # via
    #   boto3
    #   botocore
jsonschema==4.26.0
    # via trimesh
jsonschema-specifications==2025.9.1
    # via jsonschema
lxml==6.0.2
    # via
    #   trimesh
    #   yourdfpy
manifold3d==3.4.0
    # via trimesh
mapbox-earcut==2.0.0
    # via trimesh
markdown-it-py==4.0.0
    # via rich
markupsafe==3.0.3
    # via
    #   gradio
    #   jinja2
mdurl==0.1.2
    # via markdown-it-py
./MotionCorrection
    # via -r docker_requirements.in
msgspec==0.20.0
    # via viser
nodeenv==1.10.0
    # via viser
numpy==1.26.4
    # via
    #   -r docker_requirements.in
    #   accelerate
    #   embreex
    #   gradio
    #   imageio
    #   kimodo
    #   manifold3d
    #   mapbox-earcut
    #   motion-correction
    #   pandas
    #   peft
    #   pycollada
    #   scenepic
    #   scipy
    #   shapely
    #   transformers
    #   trimesh
    #   vhacdx
    #   viser
    #   yourdfpy
omegaconf==2.3.0
    # via
    #   -r docker_requirements.in
    #   hydra-core
    #   kimodo
orjson==3.11.7
    # via gradio
packaging==26.0
    # via
    #   -r docker_requirements.in
    #   accelerate
    #   gradio
    #   gradio-client
    #   huggingface-hub
    #   hydra-core
    #   kimodo
    #   peft
    #   transformers
pandas==2.3.3
    # via gradio
peft==0.18.1
    # via
    #   -r docker_requirements.in
    #   kimodo
pillow==12.1.1
    # via
    #   -r docker_requirements.in
    #   gradio
    #   imageio
    #   kimodo
    #   scenepic
    #   trimesh
psutil==7.2.2
    # via
    #   accelerate
    #   peft
pycollada==0.9.3
    # via trimesh
pydantic==2.12.5
    # via
    #   -r docker_requirements.in
    #   fastapi
    #   gradio
    #   kimodo
pydantic-core==2.41.5
    # via pydantic
pydub==0.25.1
    # via gradio
pygments==2.19.2
    # via rich
python-dateutil==2.9.0.post0
    # via
    #   botocore
    #   pandas
    #   pycollada
python-multipart==0.0.22
    # via gradio
pytz==2026.1.post1
    # via
    #   gradio
    #   pandas
pyyaml==6.0.3
    # via
    #   accelerate
    #   gradio
    #   huggingface-hub
    #   omegaconf
    #   peft
    #   transformers
referencing==0.37.0
    # via
    #   jsonschema
    #   jsonschema-specifications
regex==2026.2.28
    # via transformers
requests==2.32.5
    # via viser
rich==14.3.3
    # via
    #   typer
    #   viser
rpds-py==0.30.0
    # via
    #   jsonschema
    #   referencing
rtree==1.4.1
    # via trimesh
s3transfer==0.16.0
    # via boto3
safehttpx==0.1.7
    # via gradio
safetensors==0.7.0
    # via
    #   accelerate
    #   peft
    #   transformers
scenepic==1.1.2
    # via
    #   -r docker_requirements.in
    #   kimodo
scipy==1.15.3
    # via
    #   -r docker_requirements.in
    #   kimodo
    #   scenepic
    #   trimesh
semantic-version==2.10.0
    # via gradio
shapely==2.1.2
    # via trimesh
shellingham==1.5.4
    # via typer
six==1.17.0
    # via
    #   python-dateutil
    #   yourdfpy
starlette==0.52.1
    # via
    #   fastapi
    #   gradio
svg-path==7.0
    # via trimesh
tokenizers==0.22.2
    # via transformers
tomlkit==0.13.3
    # via gradio
tqdm==4.67.3
    # via
    #   -r docker_requirements.in
    #   huggingface-hub
    #   kimodo
    #   peft
    #   transformers
    #   viser
transformers==5.1.0
    # via
    #   -r docker_requirements.in
    #   kimodo
    #   peft
trimesh==4.11.3
    # via
    #   -r docker_requirements.in
    #   kimodo
    #   viser
    #   yourdfpy
typer==0.24.1
    # via
    #   gradio
    #   huggingface-hub
    #   typer-slim
typer-slim==0.24.0
    # via transformers
typing-extensions==4.15.0
    # via
    #   anyio
    #   exceptiongroup
    #   fastapi
    #   gradio
    #   gradio-client
    #   huggingface-hub
    #   pydantic
    #   pydantic-core
    #   referencing
    #   starlette
    #   torch
    #   typing-inspection
    #   uvicorn
    #   viser
typing-inspection==0.4.2
    # via
    #   fastapi
    #   pydantic
tzdata==2025.3
    # via pandas
urllib3==2.6.3
    # via
    #   -r docker_requirements.in
    #   botocore
    #   kimodo
    #   requests
uvicorn==0.41.0
    # via gradio
vhacdx==0.0.10
    # via trimesh
websockets==15.0.1
    # via viser
xxhash==3.6.0
    # via trimesh
yourdfpy==0.0.60
    # via viser


================================================
FILE: docs/.gitattributes
================================================
source/_static/quick_tour.mp4 filter=lfs diff=lfs merge=lfs -text


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

.PHONY: help Makefile apidoc

# Catch-all target: route all unknown targets to Sphinx
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)

apidoc:
	@$(SPHINXBUILD) -b html -q "$(SOURCEDIR)" "$(BUILDDIR)" >/dev/null 2>&1 || true
	@sphinx-apidoc -o "$(SOURCEDIR)/api_reference/_generated" -t "$(SOURCEDIR)/_templates/apidoc" ../kimodo ../kimodo/**/tests* ../kimodo/**/test* -f


================================================
FILE: docs/README.md
================================================
# Documentation

## Local build

Install doc dependencies:

```bash
pip install -r docs/requirements.txt
```

Build HTML:

```bash
cd docs
make html
```

Open the output at `docs/build/html/index.html`.

## API reference generation

Generate API stubs from the Python packages:

```bash
cd docs
make apidoc
make html
```

Note: generated stubs are written to `docs/source/api_reference/_generated` and are not
included in the default navigation. Add them to a toctree if you want to expose them.


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

set SPHINXOPTS=
set SPHINXBUILD=sphinx-build
set SOURCEDIR=source
set BUILDDIR=build

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: docs/requirements.txt
================================================
sphinx>=7.0,<9.0
nvidia-sphinx-theme
sphinx-copybutton
myst-parser
sphinx-design


================================================
FILE: docs/source/_static/custom.css
================================================
.hero {
  padding: 2.5rem 2rem;
  border-radius: 12px;
  background: linear-gradient(135deg, #0f1a0c 0%, #1c2b16 55%, #76b900 100%);
  color: #f8f9fb;
  margin: 1.5rem 0 2rem 0;
}

.hero-title {
  font-size: 2.2rem;
  margin: 0 0 0.6rem 0;
}

.hero-subtitle {
  font-size: 1.1rem;
  margin: 0 0 1.2rem 0;
  opacity: 0.9;
}

.hero-actions a {
  display: inline-block;
  margin-right: 0.8rem;
  padding: 0.5rem 0.9rem;
  border-radius: 6px;
  background: #76b900;
  color: #0f1a0c;
  text-decoration: none;
  font-weight: 600;
}

.hero-actions a.secondary {
  background: transparent;
  color: #f8f9fb;
  border: 1px solid #f8f9fb;
}

.card-grid {
  display: grid;
  gap: 1rem;
  grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
  margin: 1.5rem 0 2rem 0;
}

.card {
  border: 1px solid rgba(0, 0, 0, 0.08);
  border-radius: 10px;
  padding: 1rem 1.2rem;
  background: #ffffff;
}

.card h3 {
  margin-top: 0;
  margin-bottom: 0.4rem;
}

.card p {
  margin: 0;
  color: #3c4758;
}

.quick-links {
  display: flex;
  flex-wrap: wrap;
  gap: 0.8rem;
  margin: 1rem 0 2rem 0;
}

.quick-links a {
  display: inline-block;
  padding: 0.4rem 0.8rem;
  border-radius: 999px;
  background: #edf2f7;
  color: #1a202c;
  text-decoration: none;
  font-weight: 600;
}


================================================
FILE: docs/source/_templates/apidoc/module.rst.jinja
================================================
{%- if show_headings %}
{{- [basename, "module"] | join(' ') | e | heading }}

{% endif -%}
.. automodule:: {{ qualname }}
{%- set preferred_order = ['members', 'undoc-members', 'show-inheritance'] %}
{%- for option in preferred_order %}
{%- if option in automodule_options %}
   :{{ option }}:
{%- endif %}
{%- endfor %}
{%- for option in automodule_options %}
{%- if option not in preferred_order %}
   :{{ option }}:
{%- endif %}
{%- endfor %}


================================================
FILE: docs/source/_templates/apidoc/package.rst.jinja
================================================
{%- set preferred_order = ['members', 'undoc-members', 'show-inheritance'] %}
{%- macro automodule(modname, options) -%}
.. automodule:: {{ modname }}
{%- for option in preferred_order %}
{%- if option in options %}
   :{{ option }}:
{%- endif %}
{%- endfor %}
{%- for option in options %}
{%- if option not in preferred_order %}
   :{{ option }}:
{%- endif %}
{%- endfor %}
{%- endmacro %}

{%- macro toctree(docnames) -%}
.. toctree::
   :maxdepth: {{ maxdepth }}
{% for docname in docnames %}
   {{ docname }}
{%- endfor %}
{%- endmacro %}

{%- if is_namespace %}
{{- [pkgname, "namespace"] | join(" ") | e | heading }}
{% else %}
{{- [pkgname, "package"] | join(" ") | e | heading }}
{% endif %}

{%- if is_namespace %}
.. py:module:: {{ pkgname }}
{% endif %}

{%- if modulefirst and not is_namespace %}
{{ automodule(pkgname, automodule_options) }}
{% endif %}

{%- if subpackages %}
Subpackages
-----------

{{ toctree(subpackages) }}
{% endif %}

{%- if submodules %}
Submodules
----------
{% if separatemodules %}
{{ toctree(submodules) }}
{% else %}
{%- for submodule in submodules %}
{% if show_headings %}
{{- [submodule, "module"] | join(" ") | e | heading(2) }}
{% endif %}
{{ automodule(submodule, automodule_options) }}
{% endfor %}
{%- endif %}
{%- endif %}

{%- if not modulefirst and not is_namespace %}
Module contents
---------------

{{ automodule(pkgname, automodule_options) }}
{% endif %}


================================================
FILE: docs/source/api_reference/_generated/kimodo.demo.rst
================================================
kimodo.demo package
===================

Submodules
----------

kimodo.demo.app module
----------------------

.. automodule:: kimodo.demo.app
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.demo.config module
-------------------------

.. automodule:: kimodo.demo.config
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.demo.embedding\_cache module
-----------------------------------

.. automodule:: kimodo.demo.embedding_cache
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.demo.generation module
-----------------------------

.. automodule:: kimodo.demo.generation
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.demo.queue\_manager module
---------------------------------

.. automodule:: kimodo.demo.queue_manager
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.demo.state module
------------------------

.. automodule:: kimodo.demo.state
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.demo.ui module
---------------------

.. automodule:: kimodo.demo.ui
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.demo
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.exports.rst
================================================
kimodo.exports package
======================

Submodules
----------

kimodo.exports.bvh module
-------------------------

.. automodule:: kimodo.exports.bvh
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.exports.motion\_convert\_lib module
------------------------------------------

.. automodule:: kimodo.exports.motion_convert_lib
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.exports.motion\_formats module
-------------------------------------

.. automodule:: kimodo.exports.motion_formats
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.exports.motion\_io module
--------------------------------

.. automodule:: kimodo.exports.motion_io
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.exports.mujoco module
----------------------------

.. automodule:: kimodo.exports.mujoco
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.exports.smplx module
---------------------------

.. automodule:: kimodo.exports.smplx
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.exports
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.metrics.rst
================================================
kimodo.metrics package
======================

Submodules
----------

kimodo.metrics.base module
--------------------------

.. automodule:: kimodo.metrics.base
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.metrics.constraints module
---------------------------------

.. automodule:: kimodo.metrics.constraints
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.metrics.foot\_skate module
---------------------------------

.. automodule:: kimodo.metrics.foot_skate
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.metrics.tmr module
-------------------------

.. automodule:: kimodo.metrics.tmr
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.metrics
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.model.llm2vec.models.rst
================================================
kimodo.model.llm2vec.models package
===================================

Submodules
----------

kimodo.model.llm2vec.models.attn\_mask\_utils module
----------------------------------------------------

.. automodule:: kimodo.model.llm2vec.models.attn_mask_utils
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.llm2vec.models.bidirectional\_llama module
-------------------------------------------------------

.. automodule:: kimodo.model.llm2vec.models.bidirectional_llama
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.llm2vec.models.utils module
----------------------------------------

.. automodule:: kimodo.model.llm2vec.models.utils
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.model.llm2vec.models
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.model.llm2vec.rst
================================================
kimodo.model.llm2vec package
============================

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   kimodo.model.llm2vec.models

Submodules
----------

kimodo.model.llm2vec.llm2vec module
-----------------------------------

.. automodule:: kimodo.model.llm2vec.llm2vec
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.llm2vec.llm2vec\_wrapper module
--------------------------------------------

.. automodule:: kimodo.model.llm2vec.llm2vec_wrapper
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.model.llm2vec
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.model.rst
================================================
kimodo.model package
====================

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   kimodo.model.llm2vec

Submodules
----------

kimodo.model.backbone module
----------------------------

.. automodule:: kimodo.model.backbone
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.cfg module
-----------------------

.. automodule:: kimodo.model.cfg
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.common module
--------------------------

.. automodule:: kimodo.model.common
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.diffusion module
-----------------------------

.. automodule:: kimodo.model.diffusion
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.kimodo\_model module
---------------------------------

.. automodule:: kimodo.model.kimodo_model
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.load\_model module
-------------------------------

.. automodule:: kimodo.model.load_model
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.loading module
---------------------------

.. automodule:: kimodo.model.loading
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.registry module
----------------------------

.. automodule:: kimodo.model.registry
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.text\_encoder\_api module
--------------------------------------

.. automodule:: kimodo.model.text_encoder_api
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.tmr module
-----------------------

.. automodule:: kimodo.model.tmr
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.model.twostage\_denoiser module
--------------------------------------

.. automodule:: kimodo.model.twostage_denoiser
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.motion_rep.reps.rst
================================================
kimodo.motion\_rep.reps package
===============================

Submodules
----------

kimodo.motion\_rep.reps.base module
-----------------------------------

.. automodule:: kimodo.motion_rep.reps.base
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.motion\_rep.reps.kimodo\_motionrep module
------------------------------------------------

.. automodule:: kimodo.motion_rep.reps.kimodo_motionrep
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.motion\_rep.reps.tmr\_motionrep module
---------------------------------------------

.. automodule:: kimodo.motion_rep.reps.tmr_motionrep
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.motion_rep.reps
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.motion_rep.rst
================================================
kimodo.motion\_rep package
==========================

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   kimodo.motion_rep.reps

Submodules
----------

kimodo.motion\_rep.conditioning module
--------------------------------------

.. automodule:: kimodo.motion_rep.conditioning
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.motion\_rep.feature\_utils module
----------------------------------------

.. automodule:: kimodo.motion_rep.feature_utils
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.motion\_rep.feet module
------------------------------

.. automodule:: kimodo.motion_rep.feet
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.motion\_rep.smooth\_root module
--------------------------------------

.. automodule:: kimodo.motion_rep.smooth_root
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.motion\_rep.stats module
-------------------------------

.. automodule:: kimodo.motion_rep.stats
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.motion_rep
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.rst
================================================
kimodo package
==============

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   kimodo.demo
   kimodo.exports
   kimodo.metrics
   kimodo.model
   kimodo.motion_rep
   kimodo.scripts
   kimodo.skeleton
   kimodo.viz

Submodules
----------

kimodo.assets module
--------------------

.. automodule:: kimodo.assets
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.constraints module
-------------------------

.. automodule:: kimodo.constraints
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.geometry module
----------------------

.. automodule:: kimodo.geometry
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.meta module
------------------

.. automodule:: kimodo.meta
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.postprocess module
-------------------------

.. automodule:: kimodo.postprocess
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.sanitize module
----------------------

.. automodule:: kimodo.sanitize
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.tools module
-------------------

.. automodule:: kimodo.tools
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.scripts.rst
================================================
kimodo.scripts package
======================

Submodules
----------

kimodo.scripts.generate module
------------------------------

.. automodule:: kimodo.scripts.generate
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.scripts.gradio\_theme module
-----------------------------------

.. automodule:: kimodo.scripts.gradio_theme
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.scripts.lock\_requirements module
----------------------------------------

.. automodule:: kimodo.scripts.lock_requirements
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.scripts.motion\_convert module
-------------------------------------

.. automodule:: kimodo.scripts.motion_convert
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.scripts.mujoco\_load module
----------------------------------

.. automodule:: kimodo.scripts.mujoco_load
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.scripts.run\_text\_encoder\_server module
------------------------------------------------

.. automodule:: kimodo.scripts.run_text_encoder_server
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.scripts
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.skeleton.rst
================================================
kimodo.skeleton package
=======================

Submodules
----------

kimodo.skeleton.base module
---------------------------

.. automodule:: kimodo.skeleton.base
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.skeleton.bvh module
--------------------------

.. automodule:: kimodo.skeleton.bvh
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.skeleton.definitions module
----------------------------------

.. automodule:: kimodo.skeleton.definitions
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.skeleton.kinematics module
---------------------------------

.. automodule:: kimodo.skeleton.kinematics
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.skeleton.registry module
-------------------------------

.. automodule:: kimodo.skeleton.registry
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.skeleton.transforms module
---------------------------------

.. automodule:: kimodo.skeleton.transforms
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.skeleton
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/kimodo.viz.rst
================================================
kimodo.viz package
==================

Submodules
----------

kimodo.viz.constraint\_ui module
--------------------------------

.. automodule:: kimodo.viz.constraint_ui
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.coords module
------------------------

.. automodule:: kimodo.viz.coords
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.g1\_rig module
-------------------------

.. automodule:: kimodo.viz.g1_rig
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.gui module
---------------------

.. automodule:: kimodo.viz.gui
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.playback module
--------------------------

.. automodule:: kimodo.viz.playback
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.scene module
-----------------------

.. automodule:: kimodo.viz.scene
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.smplx\_skin module
-----------------------------

.. automodule:: kimodo.viz.smplx_skin
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.soma\_layer\_skin module
-----------------------------------

.. automodule:: kimodo.viz.soma_layer_skin
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.soma\_skin module
----------------------------

.. automodule:: kimodo.viz.soma_skin
   :members:
   :undoc-members:
   :show-inheritance:

kimodo.viz.viser\_utils module
------------------------------

.. automodule:: kimodo.viz.viser_utils
   :members:
   :undoc-members:
   :show-inheritance:

Module contents
---------------

.. automodule:: kimodo.viz
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/_generated/modules.rst
================================================
kimodo
======

.. toctree::
   :maxdepth: 4

   kimodo


================================================
FILE: docs/source/api_reference/constraints.rst
================================================
Constraints
===========

Constraint definitions and utilities.

.. automodule:: kimodo.constraints
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/exports.rst
================================================
Exports
=======

Export utilities for common formats.

.. automodule:: kimodo.exports.bvh
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.exports.mujoco
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.exports.smplx
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/index.rst
================================================
API Reference
=============

This section contains the API documentation for Kimodo, organized by domain.

.. toctree::
   :maxdepth: 2
   :caption: Core Modules

   model
   motion_rep
   constraints
   exports
   viz
   utilities
   post-processing


================================================
FILE: docs/source/api_reference/model.rst
================================================
Model
=====

Core model architecture, diffusion logic, and text encoders.


Kimodo Model
------------

.. automodule:: kimodo.model.kimodo_model
   :members:
   :undoc-members:
   :special-members: __call__

Denoiser and Backbone
---------------------
.. automodule:: kimodo.model.twostage_denoiser
   :members:
   :undoc-members:

.. automodule:: kimodo.model.backbone
   :members:
   :undoc-members:

Classifier-Free Guidance
------------------------

.. automodule:: kimodo.model.cfg
   :members:
   :undoc-members:

Model Loading
-------------

.. automodule:: kimodo.model.load_model
   :members:
   :undoc-members:

Text Encoder
------------

.. automodule:: kimodo.model.text_encoder_api
   :members:
   :undoc-members:
   :special-members: __call__


================================================
FILE: docs/source/api_reference/motion_rep.rst
================================================
Motion Representation
=====================

Motion representation utilities and kinematics helpers.

Skeleton
--------

.. automodule:: kimodo.skeleton
   :members:
   :undoc-members:
   :show-inheritance:

Forward Kinematics
------------------

.. automodule:: kimodo.skeleton.kinematics
   :members:
   :undoc-members:
   :show-inheritance:

Motion Representations
----------------------

.. automodule:: kimodo.motion_rep.reps.base
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.motion_rep.reps.kimodo_motionrep
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.motion_rep.reps.tmr_motionrep
   :members:
   :undoc-members:
   :show-inheritance:

Utilities
---------

.. automodule:: kimodo.motion_rep.feet
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.motion_rep.stats
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.motion_rep.smooth_root
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/post-processing.rst
================================================
Post-Processing Bindings
========================

.. automodule:: kimodo.postprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/utilities.rst
================================================
Utilities
=========

General utilities used across the codebase.

.. automodule:: kimodo.tools
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.geometry
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.sanitize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api_reference/viz.rst
================================================
Visualization
=============

Visualization helpers for rendering skeletons and meshes.

.. automodule:: kimodo.viz.g1_rig
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.viz.smplx_skin
   :members:
   :undoc-members:
   :show-inheritance:

.. automodule:: kimodo.viz.viser_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/benchmark/introduction.md
================================================
# Benchmark Introduction

We provide a benchmark to evaluate text-to-motion and constrained motion generation on a shared test suite.
For reproducibility, all test content is stored on disk as folders and files, so anyone can run exactly the same cases.
The benchmark test suite is available to download from HuggingFace at [`nvidia/Kimodo-Motion-Gen-Benchmark`](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark) and is currently set up for use with models trained on the [SOMA](https://github.com/NVlabs/SOMA-X) body skeleton.

The benchmark contains text prompts, durations, and constraint configurations for a variety of test cases, but **not** the ground-truth motion data itself. The ground-truth motions are derived from the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed), which has its own license you should consider. So to construct the full benchmark motions, you must download the BONES-SEED dataset separately and run our `create_benchmark` script to populate the test suite with ground-truth motions. 

Constructing the benchmark with `create_benchmark` is the first step in the full [Evaluation Pipeline](pipeline.md), which is described in detail on the next page. In addition to the benchmark test cases, we provide code to run generation with Kimodo and compute a variety of [metrics](metrics.md) measuring motion quality, text alignment, and constraint following. While this open-sourced public test suite is not the exact same used in the [Kimodo tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) (Sec. 6.1), the evaluation metrics are the same and evaluation methodology is similar.

On this page, we describe the overall structure of the test suite and details of the different test cases. Then in subsequent pages, we describe how to run the full [evaluation pipeline](pipeline.md), detail the [metrics](metrics.md), and finally provide the [results](results.md) of Kimodo-SOMA-RP and Kimodo-SOMA-SEED on the benchmark.

## Dataset Splits
To evaluate a model on the benchmark, it should be trained with the [provided splits](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark/tree/main/splits) for the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed).

The different splits are defined in:

- `train_split_paths.txt` - filenames of training data
- `test_content_split_paths.txt` - filenames for test split containing new semantic "content". This split contains motions with `content_name` (from the BONES-SEED metadata) that are not seen in the training split. This tests model generalization to new semantic motion types, e.g. for text-to-motion generalization.
- `test_repetition_split_paths.txt` - filenames for test split containing new motions from content that was seen in training. This split contains motions where the `content_name` is contained in the training split, but the exact motion itself was not seen. This tests a model's ability to generalize to novel performances of a familiar motion type, e.g., for constraint-following generalization.

The training split should be used for training, while the two test splits (`content` and `repetition`) are used in the test suite, as described below. Note that the test cases in the benchmark do not cover the entire content and repetition test splits, instead we strategically sample a subset that maximizes content diversity.

## Test Suite Structure

The full test suite contains 22,474 test cases spanning text and constraint-conditioned motion generation. 
The suite is organized hierarchically to logically group together test cases, so the evaluation pipeline can be run on a subset of the benchmark instead of the full thing, if desired.

After the benchmark has been constructed and motions generated for the model to evaluate, a **test case** is a single folder containing:

- `meta.json` (**required**): text prompt(s) and duration(s),
- `constraints.json` (**optional**): constraints for controlled generation, using the [constraints format](../user_guide/constraints.md),
- `gt_motion.npz` (**optional**): ground-truth/reference motion, using the [NPZ output format](../user_guide/output_formats.md),
- `motion.npz` (**optional**): output of the model given the `meta.json` prompt/duration and optional `constraints.json`, using the same [NPZ output format](../user_guide/output_formats.md).

In addition to being used in the evaluation pipeline, each test case can be:

- loaded in the interactive demo through **Load Example** for visualization,
- loaded in `kimodo_gen` with `--input_folder` for generation from folder-defined inputs.

### Benchmark Folder Hierarchy

The full suite is organized as follows:

```text
testsuite
├── content
│   ├── constraints_notext
│   │   ├── end-effectors
│   │   ├── fullbody
│   │   ├── mixture
│   │   └── root
│   ├── constraints_withtext
│   │   ├── end-effectors
│   │   ├── fullbody
│   │   ├── mixture
│   │   └── root
│   └── text2motion
│       ├── overview
│       ├── timeline_multi
│       └── timeline_single
└── repetition
    ├── constraints_notext
    │   ├── end-effectors
    │   ├── fullbody
    │   ├── mixture
    │   └── root
    ├── constraints_withtext
    │   ├── end-effectors
    │   ├── fullbody
    │   ├── mixture
    │   └── root
    └── text2motion
        ├── overview
        ├── timeline_multi
        └── timeline_single
```

At the highest level, the test suite is organized by the test split used. As discussed previously, `content` refers to the test split with held out semantic categories of motion, while `repetition` refers to held out motions from semantic categories seen during training. 

Within each test split, test cases are organized into:

* `text2motion`: test cases with only text prompts as input (no constraints)
* `constraints_notext`:  test cases with only constraints as input (no text prompt)
* `constraints_withtext`: test cases with both prompt and constraints as input

### Text2Motion Test Cases

These test cases are pure text-to-motion with no constraints as input. `text2motion` test cases exclusively use prompts derived from our [SEED timeline annotations](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations). It contains three types of test cases:

* `overview`: medium-detail prompt that describes a full motion. Corresponds to `overview_description` in the [NVIDIA SEED timelines](https://huggingface.co/datasets/nvidia/SEED-Timeline-Annotations) or equivalently `content_natural_desc_4` in the [BONES SEED](https://huggingface.co/datasets/bones-studio/seed) metadata.
* `timeline_single`: fine-grained prompt describing a single segment of a timeline annotation. Corresponds to a single event in a SEED timeline.
* `timeline_multi`: fine-grained prompt describing multiple subsequent segments of a timeline annotation. Corresponds to multiple contiguous events in a SEED timeline, which have been concatenated with an LLM to get a single natural text description.

### Constrained Test Cases

Constrained test cases provide a constraint input either without a text prompt (i.e., `constraints_notext`) or with an `overview` text prompt (i.e., `constraints_withtext`). The different types of constraint categories mirror the [constraint types support by Kimodo](../key_concepts/constraints.md) and include:

* `fullbody`: constrains all joint positions in the skeleton at specific frames
* `end-effectors`: constraints the position and rotations of hand and/or feet joints at specific frames
* `root`: constraints the 2D root position and optionally heading on a path or at specific frames
* `mixture`: evaluates compositional control when multiple constraint families are combined

Within each constraint type in the hierarchy are multiple subtypes that vary the constraint sparsity patterns (either in time or in space). So the hierarchy of a `constraint` folder is:

```text
constraints_XX
├── end-effectors
│   ├── feet_posrot          # feet only constraints
│   ├── hands_feet_posrot    # hands + feet constraints
│   └── hands_posrot         # hands only constraints
├── fullbody
│   ├── inbetweening         # constraints at start and end only
│   └── random               # constraints at random frames
├── mixture
│   ├── root_ee_hands_feet_posrot_fullbody    # mix of (1) root trajectory, (2) hand + foot, and (3) full-body 
│   ├── root_ee_hands_posrot                  # mix of (1) root keyframe, and (2) hands
│   ├── root_ee_hands_posrot_fullbody         # mix of (1) root keyframe, (2) hands, and (3) full-body
│   └── root_path_fullbody                    # mix of (1) root trajectory, and (2) full-body
└── root
    ├── path_2dpos             # root trajectory position
    ├── path_2dposrot          # root trajecotry position + heading
    ├── waypoint_2dpos         # root waypoint position
    └── waypoint_2dposrot      # root waypoint position + heading
```

### Indexed Test Cases in Leaf Folders

Each leaf folder contains indexed test cases (`0000`, `0001`, `0002`, ...).
For example:

```text
end-effectors/feet_posrot/
├── 0000/
├── 0001/
├── 0002/
...
└── 0255/
```

Each index folder is one standalone test case with its own `meta.json`, optional `constraints.json`, optional `gt_motion.npz`, and optional `motion.npz`.


================================================
FILE: docs/source/benchmark/metrics.md
================================================
# Metrics

The benchmark evaluates generated motion along three axes:

- **Motion quality** -- foot-skate and contact-consistency metrics,
- **Constraint following** -- position error for root, end-effector, and full-body constraints,
- **Text alignment** -- TMR retrieval and distributional metrics.

Metrics are implemented in `kimodo/metrics/` and orchestrated by `benchmark/evaluate_folder.py`.
The protocol is aligned with the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) (Sec. 6.1, "Evaluation Metrics").

## Evaluation Protocol

The evaluation pipeline runs two passes over each group of test cases:

1. **Generated pass** -- evaluates `motion.npz` with all metrics (foot skate, contact consistency, constraint following) and, when TMR embeddings are available, computes retrieval and FID scores.
2. **Ground-truth pass** -- evaluates `gt_motion.npz` with the same motion-quality and constraint metrics. TMR retrieval metrics are not recomputed in this pass.

Running both passes enables side-by-side comparison: the GT row serves as an empirical upper bound for motion quality, and deviations between GT and generated metrics highlight where the model can improve. See [Evaluation pipeline](pipeline.md) for the full workflow.

## Metrics Reference

The table below lists every key written to `metrics.json`. Detailed descriptions follow in subsequent sections.

| Key | Category | Unit | Direction |
| --- | --- | --- | --- |
| `foot_skate_from_height` | Motion quality | m/s | Lower is better |
| `foot_skate_from_pred_contacts` | Motion quality | m/s | Lower is better |
| `foot_skate_max_vel` | Motion quality | m/s | Lower is better |
| `foot_skate_ratio` | Motion quality | ratio (0--1) | Lower is better |
| `foot_contact_consistency` | Motion quality | ratio (0--1) | Higher is better |
| `constraint_root2d_err` | Constraint follow | m | Lower is better |
| `constraint_root2d_err_p95` | Constraint follow | m | Lower is better |
| `constraint_root2d_acc` | Constraint follow | ratio (0--1) | Higher is better |
| `constraint_fullbody_keyframe` | Constraint follow | m | Lower is better |
| `constraint_end_effector` | Constraint follow | m | Lower is better |
| `TMR/t2m_sim` | Text alignment | score (0--1) | Higher is better |
| `TMR/t2m_R/R01` ... `R10` | Text alignment | % | Higher is better |
| `TMR/t2m_R/MedR` | Text alignment | rank | Lower is better |
| `TMR/FID/gen_text` | Text alignment | distance | Lower is better |
| `TMR/FID/gen_gt` | Text alignment | distance | Lower is better |
| `TMR/FID/gt_text` | Text alignment | distance | Lower is better |
| `TMR/m2m_sim` | Text alignment | score (0--1) | Higher is better |
| `TMR/t2m_gt_sim` | Text alignment | score (0--1) | Higher is better |
| `TMR/m2m_R/R01` ... `R10` | Text alignment | % | Higher is better |
| `TMR/t2m_gt_R/R01` ... `R10` | Text alignment | % | Higher is better |

:::{note}
Raw metric values are stored in SI units (meters for positions, m/s for velocities).
The summary tables printed by `benchmark/parse_folder.py` convert constraint position errors to **cm** and foot-skate velocities to **cm/s** for readability.
:::

### Foot Skating Metrics

Foot skating measures how much a foot slides along the ground when it should be in static contact with the ground. Four complementary metrics capture different aspects of this artifact.

- **`foot_skate_from_height`** (m/s, lower is better):
  Mean velocity of the **toe joints** (left toe, right toe) on frames where the toe height is below a floor threshold (`height_thresh = 0.05 m`).
  This metric does not rely on predicted contact labels -- it uses a geometric criterion (Y-coordinate < threshold) to identify ground-contact frames.

- **`foot_skate_from_pred_contacts`** (m/s, lower is better):
  Mean velocity of all **four foot joints** (left/right heel and toe) on frames where the model predicts contact via the `foot_contacts` output.
  Unlike `foot_skate_from_height`, this metric trusts the model's own contact predictions and measures all four foot joints rather than toes only.

- **`foot_skate_max_vel`** (m/s, lower is better):
  Maximum velocity across all four foot joints and all time steps where predicted contact is active.
  This captures worst-case slip spikes that mean-based metrics can hide.

- **`foot_skate_ratio`** (ratio 0--1, lower is better):
  Fraction of ground-contact frames where toe velocity exceeds a threshold (`vel_thresh = 0.2 m/s`). A frame counts as ground contact when the toe is below `height_thresh = 0.05 m` on both the current and the next frame. Inspired by the [GMD](https://github.com/korrawe/guided-motion-diffusion) skating metric.

### Contact Consistency Metric

- **`foot_contact_consistency`** (ratio 0--1, higher is better):
  Agreement between the model's predicted foot contacts and a heuristic contact detector based on joint height and velocity (`vel_thresh = 0.15 m/s`, `height_thresh = 0.10 m`).
  Computed as accuracy (`1 - incorrect_ratio`) over all time steps and four contact channels.
  A score of 1.0 means perfect agreement between predicted and heuristic contacts.
  As noted in the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf), this metric provides important context for interpreting the contact-based foot-skate metrics above: if contact consistency is low, `foot_skate_from_pred_contacts` may be unreliable.

### Constraint-Following Metrics

Constraint metrics are computed only when the test case includes a `constraints.json` file. The `ContraintFollow` metric class dispatches by [constraint type](../key_concepts/constraints.md):

- **`constraint_end_effector`** (m, lower is better):
  Mean Euclidean distance between target end-effector positions and generated joint positions at the constrained frames.
  Only position-constrained joints are evaluated (rotation targets are not measured by this metric).

- **`constraint_fullbody_keyframe`** (m, lower is better):
  Mean per-joint Euclidean distance between target and generated full-body joint positions at keyframes.
  The error is averaged over all joints and all keyframe frames.

- **`constraint_root2d_err`** (m, lower is better):
  Mean 2D Euclidean distance (in the XZ ground plane) between target and generated root positions at constrained frames.

- **`constraint_root2d_err_p95`** (m, lower is better):
  95th percentile of the per-frame root 2D error across all samples in a group.
  Computed during aggregation by `evaluate_folder.py` to capture tail-end failures that the mean can mask.

- **`constraint_root2d_acc`** (ratio 0--1, higher is better):
  Fraction of constrained root frames where the 2D position error is within a distance threshold (`root_threshold = 0.10 m`).

### TMR-Based Metrics

Text alignment is evaluated using [TMR](https://mathis.petrovich.fr/tmr/) (Text-to-Motion Retrieval), a separate encoder model that maps both text and motion into a shared embedding space. TMR is not used for generation -- it is loaded only for evaluation (see `kimodo/model/tmr.py`).

We release a version of TMR retrained on the full Rigplay dataset as [`TMR-SOMA-RP-v1`](https://huggingface.co/nvidia/TMR-SOMA-RP-v1). The original TMR was trained on HumanML3D; our retrained variant uses the same architecture but is trained on the Rigplay motion dataset, SOMA skeleton, and with [LLM2Vec](https://github.com/McGill-NLP/llm2vec) text embeddings.

#### Similarity Scores

TMR encodes each text prompt and each motion clip into a unit-length embedding vector. Cosine similarity between text and motion embeddings is rescaled to a [0, 1] range:

```
score = cosine_similarity / 2 + 0.5
```

Three per-test-case similarity scores are recorded:

- **`TMR/t2m_sim`** (0--1, higher is better): similarity between the text prompt and the generated motion.
- **`TMR/m2m_sim`** (0--1, higher is better): similarity between the generated and ground-truth motions (only when GT is available).
- **`TMR/t2m_gt_sim`** (0--1, higher is better): similarity between the text prompt and the GT motion (only when GT is available).

#### R-precision (Retrieval Accuracy)

R-precision measures whether the correct motion can be retrieved from a pool given its corresponding text query.
For each text query in the evaluation group, all motions are ranked by TMR similarity.
R@k is the percentage of queries where the correct motion appears in the top k results.

Reported keys: `TMR/t2m_R/R01`, `R02`, `R03`, `R05`, `R10` (%), and `TMR/t2m_R/MedR` (median rank, lower is better) correspond to retrieval accuracy when using generated motions.

When ground-truth motions are available, analogous retrieval metrics are computed for motion-to-GT-motion (`TMR/m2m_R/...`) and text-to-GT-motion (`TMR/t2m_gt_R/...`).

:::{note}
Near-duplicate text prompts can artificially penalize retrieval ranking. The evaluation handles this by grouping prompts whose text-text similarity exceeds a threshold of 0.99 and treating any motion in that group as a valid match.
:::

#### FID (Frechet Inception Distance)

FID measures distributional distance between two sets of TMR embeddings by fitting a multivariate Gaussian to each set and computing the Frechet distance. Three FID variants are reported:

- **`TMR/FID/gen_gt`**: distance between generated-motion and GT-motion embeddings (only when GT is available). This is the FID metric that is typically reported in the motion generation literature.
- **`TMR/FID/gen_text`**: distance between generated-motion embeddings and text embeddings. 
- **`TMR/FID/gt_text`**: distance between GT-motion and text embeddings (only when GT is available).

Lower values indicate that the two distributions are more similar. FID requires at least 2 samples; groups with fewer samples report `NaN`.

#### Per-Test-Case Retrieval

In addition to the aggregate metrics above, each test case's `metrics.json` includes a `tmr` block with single motion retrieval results:

- `t2m_rank`: the rank of the correct motion when retrieving with this test case's text query.
- `top5_retrieved`: the top-5 retrieved motions (sample IDs and text prompts) for inspection.

## JSON Output Format

Below is a representative `metrics.json` written by `evaluate_folder.py` for a single test case with mixed constraints (root + end-effector + full-body) and TMR embeddings:

```json
{
  "num_motions": 1,
  "folder": "...",
  "per_motion_mean_gen": {
    "foot_skate_from_height": 0.3144,
    "foot_skate_from_pred_contacts": 0.0672,
    "foot_skate_max_vel": 0.2109,
    "foot_contact_consistency": 0.9522,
    "foot_skate_ratio": 0.2182,
    "constraint_end_effector": 0.0286,
    "constraint_root2d_err": 0.0534,
    "constraint_root2d_acc": 1.0,
    "constraint_fullbody_keyframe": 0.0324,
    "TMR/t2m_sim": 0.8209
  },
  "per_motion_mean_gt": {
    "foot_skate_from_height": 0.2361,
    "foot_skate_from_pred_contacts": 0.0269,
    "foot_skate_max_vel": 0.1459,
    "foot_contact_consistency": 1.0,
    "foot_skate_ratio": 0.1402,
    "constraint_end_effector": 9.82e-07,
    "constraint_root2d_err": 0.0407,
    "constraint_root2d_acc": 1.0,
    "constraint_fullbody_keyframe": 8.73e-07
  },
  "tmr": {
    "t2m_rank": 2,
    "text": "A person is swiftly performing a dance move by moving their hands and legs.",
    "top5_retrieved": [
      {
        "id": "0231",
        "text": "A person is performing dance steps while stepping back and forward..."
      },
      {
        "id": "0029",
        "text": "A person is swiftly performing a dance move by moving their hands and legs."
      }
    ]
  }
}
```

Group-level aggregate JSONs (`<group_name>.json`) have the same structure but with `num_motions > 1`, averaged per-motion metrics, additional keys like `constraint_root2d_err_p95`, and a `tmr` block containing the aggregate retrieval and FID scores:

```json
{
  "num_motions": 256,
  "folder": "...",
  "per_motion_mean_gen": {
    "foot_skate_from_height": 0.1742,
    "foot_skate_from_pred_contacts": 0.0611,
    "foot_skate_max_vel": 0.3747,
    "foot_contact_consistency": 0.9483,
    "foot_skate_ratio": 0.1499,
    "constraint_end_effector": 0.0367,
    "constraint_root2d_err": 0.0495,
    "constraint_root2d_acc": 0.9212,
    "constraint_fullbody_keyframe": 0.0324,
    "constraint_root2d_err_p95": 0.1115
  },
  "per_motion_mean_gt": {
    "foot_skate_from_height": 0.1617,
    "foot_skate_from_pred_contacts": 0.0235,
    "foot_skate_max_vel": 0.1185,
    "foot_contact_consistency": 1.0,
    "foot_skate_ratio": 0.1214,
    "constraint_end_effector": 1.48e-06,
    "constraint_root2d_err": 0.0376,
    "constraint_root2d_acc": 1.0,
    "constraint_fullbody_keyframe": 1.16e-06,
    "constraint_root2d_err_p95": 0.0602
  },
  "tmr": {
    "TMR/t2m_sim": 0.8742,
    "TMR/t2m_R/R01": 75.39,
    "TMR/t2m_R/R02": 85.55,
    "TMR/t2m_R/R03": 88.28,
    "TMR/t2m_R/R05": 90.23,
    "TMR/t2m_R/R10": 93.36,
    "TMR/t2m_R/MedR": 1.0,
    "TMR/t2m_R/len": 256.0,
    "TMR/FID/gen_text": 0.1442,
    "TMR/m2m_R/R01": 94.53,
    "TMR/m2m_R/R02": 97.66,
    "TMR/m2m_R/R03": 98.05,
    "TMR/m2m_R/R05": 98.83,
    "TMR/m2m_R/R10": 99.22,
    "TMR/m2m_R/MedR": 1.0,
    "TMR/m2m_R/len": 256.0,
    "TMR/t2m_gt_R/R01": 80.47,
    "TMR/t2m_gt_R/R02": 88.28,
    "TMR/t2m_gt_R/R03": 91.02,
    "TMR/t2m_gt_R/R05": 92.58,
    "TMR/t2m_gt_R/R10": 94.53,
    "TMR/t2m_gt_R/MedR": 1.0,
    "TMR/t2m_gt_R/len": 256.0,
    "TMR/FID/gen_gt": 0.0387,
    "TMR/FID/gt_text": 0.1349
  }
}
```


================================================
FILE: docs/source/benchmark/pipeline.md
================================================
# Evaluation Pipeline

This page describes the full benchmark workflow, which uses scripts in the `benchmark` directory:

1. Build full test suite using ground-truth motions from BONES-SEED BVH data and benchmark metadata (`create_benchmark.py`),
2. Generate motions with a model for all or part of the test suite (`generate_eval.py`),
3. Compute text/motion embeddings with pre-trained TMR model (`embed_folder.py `),
4. Evaluate metrics over all generated samples (`evaluate_folder.py`),
5. Aggregate and summarize results (`parse_folder.py`).

This pipeline works off-the-shelf for Kimodo models. To evaluate your own model, step (2) will need to be modified to generate with your custom model and output in the expected npz format.

## Prerequisite: Download Motion Data and Metadata
The benchmark is constructed from motions in the BONES-SEED dataset and our released metadata. Make sure you have downloaded the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed) along with the metadata for the test suite from HuggingFace at [`nvidia/Kimodo-Motion-Gen-Benchmark`](https://huggingface.co/datasets/nvidia/Kimodo-Motion-Gen-Benchmark). 

The `testsuite` folder from the downloaded metadata contains the directory structure described in the [benchmark introduction](introduction.md) with `meta.json`, `seed_motion.json`, and `seed_constraints.json` metadata files in the leaf folders. These metadata files contain info about the text prompts, durations, and constraint definitions for each test case. The first two steps of the evaluation pipeline will create the following in the leaf folders to prepare for computing metrics:

- **Ground-Truth Motion** (`gt_motion.npz`): produced by `create_benchmark.py` from SEED BVH + metadata.
- **Constraints Configuration** (`constraints.json`): for test cases with constraint inputs, this file is created by `create_benchmark.py` from SEED BVH + metadata.
- **Generated Motion** (`motion.npz`): produced by the generation step from the model to evaluate (e.g. `generate_eval.py`).

To perform the full evaluation, including metrics for both ground-truth and generated motions (steps 3--5), each leaf folder must contain both `gt_motion.npz` and `motion.npz`.

> Note: all of the following steps will work with a _subset_ of the full test suite, if desired. Anywhere the `testsuite` directory is passed in, it can be replaced with a specific subset such as `testsuite/content/text2motion` to only run this subset of the benchmark.

## 1. Build Full Benchmark (`create_benchmark.py`)

 The `create_benchmark.py` script bridges the ground truth motions and metadata: it downloads the testsuite structure (if not already present locally), then reads the referenced BVH files from a local copy of BONES-SEED and writes `gt_motion.npz` and `constraints.json` into each sample folder.

```bash
python benchmark/create_benchmark.py path/to/testsuite --dataset datasets/bones-seed/soma_uniform
```

By default, this construction can take several hours and the resulting folder is about **26 GB**. 

To run faster, you can increase the number of parallel workers for processing:
```bash
OMP_NUM_THREADS=2 python benchmark/create_benchmark.py path/to/testsuite --dataset datasets/bones-seed/soma_uniform --workers 16
```
This example runs well with a 32-core system, but you may need to adjust the number of threads-per-worker and total workers for your system. Generally, a lower number of threads-per-worker with larger number of workers (up to your available CPU capacity) runs fastest.

Options:

- `--dataset`: path to the local SEED dataset folder (default: `datasets/bones-seed/soma_uniform`).
- `--workers`: number of parallel workers to use for benchmark construction (default: 1, sequential)
- `--overwrite`: rebuild `gt_motion.npz` even if it already exists.

For each test case, the script:

1. parses the BVH file into local rotation matrices and root translation,
2. subsamples to 30 FPS,
3. converts to the standard T-pose via `SOMASkeleton77.to_standard_tpose`,
4. computes Kimodo motion features and canonicalizes the motion,
5. writes the resulting motion dictionary as `gt_motion.npz`.

For a detailed walkthrough of steps 1--4, see [Loading BONES-SEED BVH data](../user_guide/seed_dataset.md).

## 2. Generate Motions (`generate_eval.py`)

The next step is to generate a motion for each test case.
The script `benchmark/generate_eval.py` recursively generates one motion with Kimodo per test case from either the full `testsuite` or a  desired subset. 

```bash
python benchmark/generate_eval.py \
  --benchmark path/to/testsuite \
  --output generated_folder \
  --model kimodo-soma-rp \
  --batch_size 32 \
  --num_workers 4
```

The batch size and number of data workers should be adjusted for your system. The script is intended to be run with the latest Kimodo-SOMA models (right now v1.1) which are compatible with the benchmark.

> Note: each test cases has a seed in `meta.json` that is  loaded and used for generation to enable reproducibility. However, by default, the generation script uses the first seed in a batch to seed the whole batch, so to make results completely repeatable, you must set the batch size to 1 or always use the same batch size when running generation.

Useful options:

- `--model`: Kimodo model to use for generation. See [available models](../getting_started/quick_start.md#overview-kimodo-models) for the full list. 
- `--output`: output root directory. The testsuite hierarchy is mirrored here. If omitted, motions are generated **in-place** inside the testsuite folder.
- `--overwrite`: regenerate even if `motion.npz` already exists.
- `--diffusion_steps`: default denoising steps (can be overridden by each sample `meta.json`).
- `--postprocess`: enable post-processing. For fair evaluation, it is recommended to **not** use post-processing so that metrics reflect the raw model output.
- `--text_encoder_fp32`: will instantiate the text encoder (if needed) with float32 precision instead of bfloat16. The Kimodo v1.1 models are trained with float32 text encodings, so this slightly improves accuracy but requires extra VRAM.

After generation, the output tree mirrors the `testsuite` hierarchy and includes generated motions (`motion.npz`). If the testsuite was built with `create_benchmark.py`, each leaf already has `gt_motion.npz`; the generation step adds `motion.npz` per sample.

```text
generated_folder/
└── .../0000/
    ├── meta.json
    ├── constraints.json                # present if available in testsuite
    ├── gt_motion.npz                   # if built with create_benchmark
    └── motion.npz                      # generated
```

### Using Custom Models

The `generate_eval` script is set up to work with Kimodo models, but it can be easily adapted or replaced by generation with a custom model. The only requirement to be able to compute all metrics is to output the `motion.npz` file for each test case that minimally contains: (1) `posed_joints` field with global joint positions on the SOMA 77-joint skeleton and (2) `foot_contacts` field with binary foot contact predictions. Please see the [output formats docs](../user_guide/output_formats.md) for more details on the `NPZ` format.

## 3. Embed with Pre-Trained TMR (`embed_folder.py`)

Several evaluation metrics such as R-precision, FID, and latent similarity rely on latent embeddings of both motion and text. For this purpose, we use a [Text-Motion-Retrieval (TMR)](https://mathis.petrovich.fr/tmr/) model trained on the full Bones Rigplay dataset. See [Metrics](metrics.md) for details on the TMR evaluation protocol and metrics. 

The next step in the eval pipeline is using this TMR model with the `benchmark/embed_folder.py` script to recursively embed each generated motion (`motion.npz`), GT motion (`gt_motion.npz`) when present, and the text prompt from `meta.json`:

```bash
python benchmark/embed_folder.py generated_folder --model tmr-soma-rp
```

The default TMR model (`tmr-soma-rp`) trained on the full Rigplay dataset is released as [`TMR-SOMA-RP-v1`](https://huggingface.co/nvidia/TMR-SOMA-RP-v1). It is automatically downloaded from HuggingFace on first use of the embedding script. 

Options:

- `--model`: TMR model to use for encoding (default: `tmr-soma-rp`).
- `--device`: compute device (`cuda` or `cpu`). Defaults to `cuda` if available, otherwise `cpu`.
- `--overwrite`: re-embed even if embedding files already exist.
- `--text_encoder_fp32`: will instantiate the text encoder (if needed) with float32 precision instead of bfloat16. The TMR model is trained with float32 text encodings, so this slightly improves accuracy but requires extra VRAM.

Running this script saves the embeddings to each test case folder that has the corresponding motion file(s) and `meta.json`:

- `motion_embedding.npy` (when `motion.npz` exists)
- `gt_motion_embedding.npy` (when `gt_motion.npz` exists)
- `text_embedding.npy`

> Note: this script can take over 1 hour to run for the full test suite, depending on your GPU.

## 4. Compute Evaluation Metrics (`evaluate_folder.py`)

Next, use `benchmark/evaluate_folder.py` to compute per-test-case and aggregated metrics across the test suite (or a specific subset folder). Each leaf folder must contain both `motion.npz` and `gt_motion.npz` to compute the metrics.

```bash
python benchmark/evaluate_folder.py generated_folder
```

Options:

- `--device`: compute device (`cuda` or `cpu`). Defaults to `cuda` if available, otherwise `cpu`.

The script runs two evaluation passes: one on the generated motion (`motion.npz`) and one on the ground-truth motion (`gt_motion.npz`). It outputs:

- per test case results: `metrics.json` inside each test case (leaf) folder with metrics summarized for that single test case
- per group results: `<group_name>.json` one level above each group of test-case folders that aggregates metrics over all contained test cases

Please see the [Metrics](metrics.md) page for a detailed explanation of these json formats.

After embedding and evaluation, the folder structure should look like:

```text
generated_folder/
├── .../0000/
│   ├── motion.npz
│   ├── gt_motion.npz
│   ├── motion_embedding.npy
│   ├── gt_motion_embedding.npy
│   ├── text_embedding.npy
│   └── metrics.json              # single test-case metrics
└── .../<group_name>.json         # folder-level aggregate summary of all contained test cases
```

## 5. Summarize Results of Full Benchmark (`parse_folder.py`)

If you have computed metrics for the _entire_ test suite (both `content` and `repetition` splits), use `benchmark/parse_folder.py` to validate all per-test-case result JSONs and aggregate metrics into summary tables. Unlike the previous steps, this script expects the user to pass in the root `testsuite` and for the test suite to follow the standard split/category hierarchy (see [Introduction](introduction.md)):

- **Splits**: `content`, `repetition`
- **Categories**: `overview`, `timeline_single`, `timeline_multi` (text-following), `constraints_withtext`, `constraints_notext` (constrained generation)

```bash
python benchmark/parse_folder.py generated_folder
```

Options:

- `--output`: path for the output JSON (default: `<folder>/summary_rows.json`).
- `--format`: table output format. `terminal` (default) for fixed-width tables, `md` for markdown tables suitable for copy-pasting into documentation.

The script:

1. discovers all grouped test case directories (folders containing single test cases with `meta.json`, `motion.npz`, and `gt_motion.npz`),
2. loads each group's `<group_name>.json` result files written by `evaluate_folder`,
3. computes weighted averages of all metrics by split and category,
4. writes `summary_rows.json` with per-row and per-table aggregated results,
5. prints formatted benchmark tables to the terminal (text-following and constraints, with GT and method rows side by side).

Metric values in the tables are converted to user-friendly units (e.g., constraint position errors in cm, foot skating in cm/s). See [Metrics](metrics.md) for definitions of individual metrics.


================================================
FILE: docs/source/benchmark/results.md
================================================
# Kimodo Results

On this page, we report the results for the latest Kimodo models on the benchmark test suite. These results are reproducible with the [evaluation pipeline](pipeline.md) and should be used when comparing against other models. Note that the reported numbers differ from the numbers in the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf) (Sec. 6) due to differences in skeleton, test suite composition, and evaluation details.

To reproduce these results or evaluate your own model, follow the [evaluation pipeline](pipeline.md) and use `parse_folder --format md` to generate summary tables in markdown format.

**Note on reproducibility**: to exactly reproduce the results in the tables below, use batch size 1 when generating with Kimodo (i.e., when running `generate_eval.py`). This way, every test case is individually seeded according to `meta.json`. The reported results were computed using LLM2Vec in the default `bfloat16` precision. However, the Kimodo-SOMA-v1.1 and TMR models were actually trained with `float32` embeddings, so if you want to get the best possible performance (and you have enough VRAM), you can include `--text_encoder_fp32` when running the generation and embedding steps, even though the results will not match the tables here.

Results are reported on the two splits described in [the introduction](introduction.md#dataset-splits):

- **Content**: test cases with novel semantic content not present in training (e.g. unseen action categories).
- **Repetition**: content categories seen during training, but specific motion clips are held out and unseen. Note that due to the annotations in Bones Rigplay and SEED datasets, the text prompts in this test split have already been seen during training.

For each split, we also report metrics for the ground truth motion. These rows serve as an empirical upper bound for motion quality, and deviations between ground truth and generated metrics highlight where the model can improve.

We split results for each model into two tables corresponding to different test cases in the test suite:

- **Text-Following**: `overview`, `timeline_single`, and `timeline_multi`
- **Constrained**: `constraints_withtext`, `constraints_notext`

<!-- 
## Kimodo-SOMA-SEED-v1.1
These results are for the Kimodo model trained on the public [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset. The results are comparable to any model trained on SEED that uses our recommended splits [described in the introduction](introduction.md#dataset-splits).

### Text-Following Evaluation

|  | Overview R@3↑ | Overview FID↓ | Overview Skate↓ | Overview Contact↑ | Timeline single R@3↑ | Timeline single FID↓ | Timeline single Skate↓ | Timeline single Contact↑ | Timeline multi R@3↑ | Timeline multi FID↓ | Timeline multi Skate↓ | Timeline multi Contact↑ |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| **Content** Ground Truth | 89.09 | 0.000 | 1.849 | 1.000 | 86.26 | 0.000 | 1.789 | 1.000 | 88.47 | 0.000 | 1.711 | 1.000 |
| **Content** Kimodo | 81.13 | 0.035 | 4.077 | 0.977 | 73.17 | 0.028 | 3.873 | 0.980 | 80.10 | 0.032 | 3.685 | 0.981 |
| **Repetition** Ground Truth | 93.91 | 0.000 | 2.106 | 1.000 | 90.13 | 0.000 | 2.037 | 1.000 | 94.49 | 0.000 | 1.931 | 1.000 |
| **Repetition** Kimodo | 90.92 | 0.004 | 4.573 | 0.972 | 80.38 | 0.007 | 4.442 | 0.976 | 92.58 | 0.006 | 4.199 | 0.974 |


### Constrained Evaluation

|  | With text FB Pos↓ | With text EE Pos↓ | With text EE Rot↓ | With text 2D Root↓ | With text Pelvis@95% | Without text FB Pos↓ | Without text EE Pos↓ | Without text EE Rot↓ | Without text 2D Root↓ | Without text Pelvis@95% |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| **Content** Ground Truth | 0.000 | 0.000 | - | 3.837 | 5.36 | 0.000 | 0.000 | - | 3.913 | 5.41 |
| **Content** Kimodo | 3.421 | 3.817 | - | 4.979 | 9.14 | 3.320 | 3.664 | - | 4.797 | 9.03 |
| **Repetition** Ground Truth | 0.000 | 0.000 | - | 3.607 | 5.44 | 0.000 | 0.000 | - | 3.567 | 5.42 |
| **Repetition** Kimodo | 3.187 | 3.852 | - | 4.734 | 9.19 | 3.120 | 3.510 | - | 4.264 | 7.89 |


## Kimodo-SOMA-RP-v1.1
These results are for the Kimodo model trained on the full (proprietary) Bones Rigplay dataset which is a superset of BONES-SEED. Though the training split is larger, the model is not trained on the SEED test splits to ensure a fair comparison.

### Text-Following Evaluation

|  | Overview R@3↑ | Overview FID↓ | Overview Skate↓ | Overview Contact↑ | Timeline single R@3↑ | Timeline single FID↓ | Timeline single Skate↓ | Timeline single Contact↑ | Timeline multi R@3↑ | Timeline multi FID↓ | Timeline multi Skate↓ | Timeline multi Contact↑ |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| **Content** Ground Truth | 89.09 | 0.000 | 1.849 | 1.000 | 86.26 | 0.000 | 1.789 | 1.000 | 88.47 | 0.000 | 1.711 | 1.000 |
| **Content** Kimodo | 83.32 | 0.025 | 3.641 | 0.982 | 78.08 | 0.026 | 3.523 | 0.984 | 84.79 | 0.028 | 3.278 | 0.985 |
| **Repetition** Ground Truth | 93.91 | 0.000 | 2.106 | 1.000 | 90.13 | 0.000 | 2.037 | 1.000 | 94.49 | 0.000 | 1.931 | 1.000 |
| **Repetition** Kimodo | 87.90 | 0.008 | 4.103 | 0.977 | 77.02 | 0.011 | 3.938 | 0.981 | 88.59 | 0.009 | 3.727 | 0.980 |


### Constrained Evaluation

|  | With text FB Pos↓ | With text EE Pos↓ | With text EE Rot↓ | With text 2D Root↓ | With text Pelvis@95% | Without text FB Pos↓ | Without text EE Pos↓ | Without text EE Rot↓ | Without text 2D Root↓ | Without text Pelvis@95% |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| **Content** Ground Truth | 0.000 | 0.000 | - | 3.837 | 5.36 | 0.000 | 0.000 | - | 3.913 | 5.41 |
| **Content** Kimodo | 2.929 | 3.029 | - | 4.581 | 7.77 | 2.935 | 2.994 | - | 4.411 | 7.37 |
| **Repetition** Ground Truth | 0.000 | 0.000 | - | 3.607 | 5.44 | 0.000 | 0.000 | - | 3.567 | 5.42 |
| **Repetition** Kimodo | 2.804 | 2.983 | - | 4.260 | 7.63 | 2.829 | 2.969 | - | 4.027 | 7.21 |
-->


## Quantitative Results

Results are reported for two models:

- **Kimodo-SOMA-SEED-v1.1**:  trained on the public [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) dataset. The results are comparable to any model trained on SEED that uses our recommended splits [described in the introduction](introduction.md#dataset-splits).
- **Kimodo-SOMA-RP-v1.1**: trained on the full (proprietary) Bones Rigplay dataset which is a superset of BONES-SEED. Though the training split is larger, the model is not trained on the SEED test splits to ensure a fair comparison.

### Text-Following Evaluation

|  | Overview R@3↑ | Overview FID↓ | Overview Skate↓ | Overview Contact↑ | Timeline single R@3↑ | Timeline single FID↓ | Timeline single Skate↓ | Timeline single Contact↑ | Timeline multi R@3↑ | Timeline multi FID↓ | Timeline multi Skate↓ | Timeline multi Contact↑ |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| **Content** Ground Truth | 89.09 | 0.000 | 1.849 | 1.000 | 86.26 | 0.000 | 1.789 | 1.000 | 88.47 | 0.000 | 1.711 | 1.000 |
| **Content** Kimodo-SOMA-SEED-v1.1 | 81.13 | 0.035 | 4.077 | 0.977 | 73.17 | 0.028 | 3.873 | 0.980 | 80.10 | 0.032 | 3.685 | 0.981 |
| **Content** Kimodo-SOMA-RP-v1.1 | 83.32 | 0.025 | 3.641 | 0.982 | 78.08 | 0.026 | 3.523 | 0.984 | 84.79 | 0.028 | 3.278 | 0.985 |
| **Repetition** Ground Truth | 93.91 | 0.000 | 2.106 | 1.000 | 90.13 | 0.000 | 2.037 | 1.000 | 94.49 | 0.000 | 1.931 | 1.000 |
| **Repetition** Kimodo-SOMA-SEED-v1.1 | 90.92 | 0.004 | 4.573 | 0.972 | 80.38 | 0.007 | 4.442 | 0.976 | 92.58 | 0.006 | 4.199 | 0.974 |
| **Repetition** Kimodo-SOMA-RP-v1.1 | 87.90 | 0.008 | 4.103 | 0.977 | 77.02 | 0.011 | 3.938 | 0.981 | 88.59 | 0.009 | 3.727 | 0.980 |

### Constrained Evaluation

|  | With text FB Pos↓ | With text EE Pos↓ | With text EE Rot↓ | With text 2D Root↓ | With text Pelvis@95% | Without text FB Pos↓ | Without text EE Pos↓ | Without text EE Rot↓ | Without text 2D Root↓ | Without text Pelvis@95% |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| **Content** Ground Truth | 0.000 | 0.000 | - | 3.837 | 5.36 | 0.000 | 0.000 | - | 3.913 | 5.41 |
| **Content** Kimodo-SOMA-SEED-v1.1 | 3.421 | 3.817 | - | 4.979 | 9.14 | 3.320 | 3.664 | - | 4.797 | 9.03 |
| **Content** Kimodo-SOMA-RP-v1.1 | 2.929 | 3.029 | - | 4.581 | 7.77 | 2.935 | 2.994 | - | 4.411 | 7.37 |
| **Repetition** Ground Truth | 0.000 | 0.000 | - | 3.607 | 5.44 | 0.000 | 0.000 | - | 3.567 | 5.42 |
| **Repetition** Kimodo-SOMA-SEED-v1.1 | 3.187 | 3.852 | - | 4.734 | 9.19 | 3.120 | 3.510 | - | 4.264 | 7.89 |
| **Repetition** Kimodo-SOMA-RP-v1.1 | 2.804 | 2.983 | - | 4.260 | 7.63 | 2.829 | 2.969 | - | 4.027 | 7.21 |

================================================
FILE: docs/source/conf.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os
import sys

# -- Path setup --------------------------------------------------------------
sys.path.insert(0, os.path.abspath("../.."))

# -- Project information -----------------------------------------------------

project = "Kimodo"
copyright = "2026, NVIDIA"
author = "NVIDIA"

version = ""
release = ""

# -- General configuration ---------------------------------------------------

extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.napoleon",
    "sphinx.ext.viewcode",
    "sphinx.ext.intersphinx",
    "sphinx.ext.autosummary",
    "sphinx.ext.githubpages",
    "sphinx_copybutton",
    "myst_parser",
    "sphinx_design",
]

napoleon_google_docstring = True
napoleon_numpy_docstring = False
napoleon_include_init_with_doc = True
napoleon_use_param = True
napoleon_use_rtype = True

autodoc_default_options = {
    "members": True,
    "member-order": "bysource",
    "special-members": "__init__",
    "undoc-members": True,
    "exclude-members": "__weakref__",
    "show-inheritance": False,
}
autodoc_typehints = "none"

autosummary_generate = True

# Avoid initialization issues for optional native libs
os.environ.setdefault("MUJOCO_GL", "osmesa")
os.environ.setdefault("PYOPENGL_PLATFORM", "osmesa")


class Mock:
    """Mock class for imports that can't be satisfied."""

    def __init__(self, *args, **kwargs):
        pass

    def __call__(self, *args, **kwargs):
        return Mock()

    def __getattr__(self, name):
        if name in ("__file__", "__path__"):
            return "/dev/null"
        if name == "__version__":
            # Some libraries (e.g. safetensors) parse torch.__version__ with
            # packaging.version.Version, so this must be a valid PEP 440 string.
            return "0.0.0"
        if name == "__signature__":
            return None
        if name == "__mro_entries__":
            return lambda bases: ()
        return Mock()

    def __getitem__(self, name):
        return Mock()

    def __iter__(self):
        return iter([])

    def __or__(self, other):
        return Mock()

    def __ror__(self, other):
        return Mock()


mock_modules = [
    "torch",
    "torch.nn",
    "torch.nn.functional",
    "torch.optim",
    "torch.distributed",
    "torch.cuda",
    "torch.utils",
    "torch.utils.data",
    "lightning",
    "lightning.fabric",
    "lightning_fabric",
    "pytorch_lightning",
    "tensordict",
    "pydantic",
    "pydantic.dataclasses",
    "pydantic_core",
    "mujoco",
    "isaacgym",
    "isaacgymenvs",
    "genesis",
    "omni",
    "wandb",
    "hydra",
    "omegaconf",
    "tqdm",
    "trimesh",
    "pyvista",
    "smplx",
    "smpl",
    "scipy",
    "scipy.spatial",
    "scipy.spatial.transform",
    "peft",
    "transformers",
    "safetensors",
    "safetensors.torch",
    "sklearn",
    "PIL",
    "cv2",
    "rich",
    "rich.progress",
    "skimage",
    "imageio",
    "openmesh",
    "gym",
    "easydict",
    "dm_control",
    "dm_control.mjcf",
    "dm_control.mujoco",
    "matplotlib",
    "matplotlib.pyplot",
]

for mod in mock_modules:
    sys.modules[mod] = Mock()

autodoc_mock_imports = mock_modules

templates_path = ["_templates"]
exclude_patterns = ["api_reference/_generated/**"]

language = "en"

source_suffix = {
    ".rst": "restructuredtext",
    ".md": "markdown",
}

master_doc = "index"

# -- Options for HTML output -------------------------------------------------

html_theme = "nvidia_sphinx_theme"
html_static_path = ["_static"]
html_css_files = ["custom.css"]
html_logo = "_static/logo-placeholder.svg"
html_show_sourcelink = False

html_theme_options = {
    "collapse_navigation": False,
    "navigation_depth": 4,
}

toc_object_entries_show_parents = "hide"

htmlhelp_basename = "Kimododoc"

# -- Options for intersphinx -------------------------------------------------

intersphinx_mapping = {
    "python": ("https://docs.python.org/3", None),
    "torch": ("https://pytorch.org/docs/stable/", None),
    "numpy": ("https://numpy.org/doc/stable/", None),
}

copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
copybutton_prompt_is_regexp = True

# Generate heading anchors so cross-doc links like path.md#fragment resolve (local ids).
myst_heading_anchors = 4

# Required so `:::{dropdown}` and other fenced directives in .md files are parsed (not shown as plain text).
myst_enable_extensions = ["colon_fence"]


def setup(app):
    app.add_css_file("custom.css")


================================================
FILE: docs/source/getting_started/installation.md
================================================
# Installation

> Note: This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use.

> Note: This repo was developed and primarily tested on Linux

There are two ways to install Kimodo: (1) as a package, or (2) download the source code and install.
Both require setting up a Hugging Face token to use the text encoder at generation time.

## Set Up Hugging Face Token

The Kimodo text encoder relies on the **gated** `meta-llama/Meta-Llama-3-8B-Instruct` model, which requires:
- Your HF account has been granted access to the [model page](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct).
- You provide a HF token for runtime

After receiving access to the Llama repo, please create an access token [here](https://huggingface.co/settings/tokens/new?tokenType=read). Then use it to log in on your command line:

```bash
hf auth login
```
or alternatively, paste the token in this file ``~/.cache/huggingface/token``. If you don't have `hf` installed, you will first need to run `pip install --upgrade huggingface_hub`.

## Kimodo Install Option 1: Package Install

The easiest way to get started is simply installing Kimodo as a package without needing to clone the codebase. This will allow you to generate motions and run the demo as a black box.

We suggest creating a new Python environment for the install, for example with `venv` or conda:
```bash
conda create -n kimodo python=3.10
conda activate kimodo
```

To ensure you have a version of [PyTorch](https://pytorch.org/get-started/locally/) that is compatible with your system and CUDA version, it is recommended to manually install the best version of PyTorch for you before installing Kimodo. Anything over PyTorch 2.0 is sufficient. We strongly suggest using a GPU-capable version of PyTorch to generate motions in a reasonable amount of time.

Installing the base Kimodo package will allow you generate motions with the command line:
```bash
pip install git+https://github.com/nv-tlabs/kimodo.git
```

If you want to be able to run the interactive demo as well, use this command which installs additional dependencies:
```bash
pip install "kimodo[all] @ git+https://github.com/nv-tlabs/kimodo.git"
```

Now should be ready to use Kimodo. Check out the [quick start guide](quick_start.md) to see how to generate motions.

If you experience issues with package or system compatibility using the above install strategy, we recommend downloading the codebase and using the Docker install detailed below.

## Kimodo Install Option 2: Source Code Install

If you plan to build on Kimodo or dig into the codebase, you'll want to clone and install the repo.

### Clone Kimodo Repository

```bash
git clone https://github.com/nv-tlabs/kimodo.git
cd kimodo
```

### Choose Your Installation Route
Kimodo can be installed by building and running through a virtual environment (e.g., `conda`) or within a Docker container.

```{toctree}
:maxdepth: 1

installation_virtual_env
installation_docker
installation_smpl
```


================================================
FILE: docs/source/getting_started/installation_docker.md
================================================
# Installation With Docker

> Note: the first time building and running with Docker can take several minutes, please be patient.

## Clone Modified Viser Library
The interactive demo relies on [a fork of Viser](https://github.com/nv-tlabs/kimodo-viser) that implements a timeline interface and more. Clone it within the `kimodo` directory before building with Docker using:
```bash
git clone https://github.com/nv-tlabs/kimodo-viser.git
```

## Quick Install

Before running Docker, make sure your Hugging Face token is available at
`~/.cache/huggingface/token` on the host, for example by running
`hf auth login` once outside the container (see the [Installation](installation.md) instructions).

The easiest way to build and immediately run the interactive demo webapp (with the text-encoder service) in one command is:

```bash
docker compose up -d --build
```

## Step-by-Step Installation

Alternatively, you can first build with:

```bash
docker compose build
```

This builds `text-encoder` and `demo` containers corresponding to the text encoding service and the interactive motion authoring webapp, respectively. Please see the [quick start guide](quick_start.md) for more information on these.

<details>

<summary>Advanced Configuration of Dependencies</summary>

This repo uses:
- `docker_requirements.in`: human-maintained, top-level dependencies
- `docker_requirements.txt`: pinned lockfile (automatically generated)

Notes:
- We keep a lockfile for **reproducible Docker builds** (so a rebuild next week pulls the same deps).
- The lockfile intentionally **omits `torch`/CUDA wheels** because the Docker base image
  (`nvcr.io/nvidia/pytorch`) already provides a tested PyTorch build (avoids slow installs and CUDA mismatches).

</details>
<br>

After building, you will need to manually start the text-encoder service before doing any motion generation:
```bash
docker compose up text-encoder
```
Note, the first time running this command will take a long time as the Llama-based text encoder is downloaded.

Finally, to start the interactive demo:
```bash
docker compose up demo
```

For more information on using the Docker setup, see the [Quick Start](quick_start.md) guide next.


================================================
FILE: docs/source/getting_started/installation_smpl.md
================================================
# Using Kimodo-SMPLX Model

Using the [Kimodo-SMPLX-RP-v1](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) model requires a few extra installation steps.

## Request Model Access

The SMPL-X version of Kimodo is gated, so before trying to generate motions with it in the CLI or demo, go to the [Hugging Face model page](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) and request access. As described in the [installation](./installation.md) process, make sure your HF token is properly set up so your access to the model can be authenticated.

## Download SMPL-X Body Model
If you want to visualize generated SMPL-X motions in the demo, you will need to download the SMPL-X body model.
Go to the [SMPL-X](https://smpl-x.is.tue.mpg.de/) webpage and then sign in or create an account and go to the "Download" page.
Click "Download SMPL-X with removed head bun (NPZ)" and then copy the `SMPLX_NEUTRAL.npz` file to the Kimodo codebase to be at `kimodo/kimodo/assets/skeletons/smplx22/SMPLX_NEUTRAL.npz`.

Note that if you installed Kimodo as a package without downloading the codebase, you'll need to find where the assets directory is located by running:
```bash
python -c "from kimodo.assets import skeleton_asset_path; print(skeleton_asset_path('smplx22'))"
```


================================================
FILE: docs/source/getting_started/installation_virtual_env.md
================================================
# Installation With Virtual Environment

> Note: the repo was tested with Python 3.10+ and PyTorch 2.0+.

## Create Enviroment
We recommend setting up a separate virtual environment for Kimodo to avoid dependency conflicts.

### Using venv
```bash
python -m venv venv
source venv/bin/activate
```

### Using Conda
```bash
conda create -n kimodo python=3.10
conda activate kimodo
```

## Install Dependencies

### Install PyTorch
First, make sure to install a version of [PyTorch](https://pytorch.org/get-started/locally/) that works with your system and CUDA version. We suggest anything over PyTorch 2.0. We strongly suggest using a GPU-capable version of PyTorch to generate motions in a reasonable amount of time.

### (Optional) Clone Modified Viser Library
The interactive demo relies on [a fork of Viser](https://github.com/nv-tlabs/kimodo-viser) that implements a timeline interface and more. If you want to have an editable install of this version of Viser (i.e., you expect to modify it), clone and install it within the `kimodo` directory using:
```bash
git clone https://github.com/nv-tlabs/kimodo-viser.git
pip install -e kimodo-viser
```

### Install Kimodo
Next, install Kimodo run this command from the base of repo:
```bash
pip install -e .
```
This results in a single editable install for Kimodo and the MotionCorrection package.

If you plan to use the demo, you can instead run:
```bash
pip install -e ".[all]"
```
This will install our [Viser fork](https://github.com/nv-tlabs/kimodo-viser) (if not already installed in the previous step) and the [SOMA body model](https://github.com/NVlabs/SOMA-X).

Next, head over to the [Quick Start](quick_start.md) page to test out your installation by generating some motions.


================================================
FILE: docs/source/getting_started/quick_start.md
================================================
# Quick Start

This page provides a quick introduction to motion generation with Kimodo. For detailed explanations, we recommend reviewing the full documentation pages linked in each section.

Before running these commands, follow the [installation guide](installation.md) to install Kimodo in a virtual environment or using Docker.

## Overview: Kimodo Models
Motion generation can be performed with several trained Kimodo models that vary by skeleton and training dataset.

> Note: models will be downloaded automatically when attempting to generate from the CLI or Interactive Demo, so there is no need to download them manually

| Model | Skeleton | Training Data | Release Date | Hugging Face | License |
|-------|------|------|-------------|-------------|----|
| **Kimodo-SOMA-RP-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | April 10, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SOMA-SEED-v1.1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | April 10, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1.1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SOMA-RP-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026 | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-G1-RP-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-G1-RP-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SOMA-SEED-v1** | [SOMA](https://github.com/NVlabs/SOMA-X) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-SOMA-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-G1-SEED-v1** | [Unitree G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1) | [BONES-SEED](https://huggingface.co/datasets/bones-studio/seed) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-G1-SEED-v1) | [NVIDIA Open Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) |
| **Kimodo-SMPLX-RP-v1** | [SMPL-X](https://github.com/vchoutas/smplx) | [Bones Rigplay 1](https://bones.studio/datasets#rp01) | March 16, 2026  | [Link](https://huggingface.co/nvidia/Kimodo-SMPLX-RP-v1) | [NVIDIA R&D Model](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-internal-scientific-research-and-development-model-license/) |

By default, we recommend using the models trained on the full Bones Rigplay dataset (700 hours of mocap) for your motion generation needs.
The models trained on BONES-SEED use 288 hours of [publicly available mocap data](https://huggingface.co/datasets/bones-studio/seed) so are less capable, but are useful for comparing your own trained models on the same dataset. See the [benchmark](../benchmark/introduction.md) for a standardized evaluation suite on BONES-SEED.

### Recommended Hardware
Kimodo requires  ~17GB of VRAM to generate locally entirely on GPU, due primarily to the size of the text embedding model. If you have a smaller card, set `TEXT_ENCODER_DEVICE=cpu` when running Kimodo commands to force text encoding to the CPU. This is slightly slower but reduces VRAM usage to <3 GB.

The model has been most extensively tested on GeForce RTX 3090, GeForce RTX 4090, and NVIDIA A100 GPUs, but it should work on other recent cards with sufficient VRAM.

## Run Text-Encoder Service
Motion generation relies on embedding the input text prompt, which becomes the input to Kimodo. Although it is fine to run the CLI commands and demo on their own, it may be preferred to start the _text encoder service_ in the background, which can be shared across all motion generation requests. This is much more efficient when making many consecutive CLI calls, as it avoids needing to instantiate the large text encoder every time.

To start the text encoder service:
```bash
kimodo_textencoder
```

The first run of the service will take a while as it downloads the embedding model. We recommend running this in the background or in a separate terminal where it will stay open and usable by other scripts.

If you are using the Docker set up, the service can alternatively be started in the container with:
```bash
docker compose up text-encoder
```

> Note: when the text encoder is initialized, the transformers library will report several unexpected and missing layers for LLM2Vec. These are expected and can be safely ignored.

If you are running on a GPU with <16 GB VRAM, you can force the text encoder to the CPU, for example:
```bash
TEXT_ENCODER_DEVICE=cpu kimodo_textencoder
```

## Command-Line Text-to-Motion Generation
**[CLI Documentation](../user_guide/cli.md)**

You can generate motions from the command line using the generate script:

```bash
kimodo_gen "A person walks forward." \
    --model Kimodo-SOMA-RP-v1 \
    --duration 5.0 \
    --output output
```

The `--model` command corresponds to the model name in the table above. The output motion will be saved using the stem name given by `--output` in the Kimodo [output format](../user_guide/output_formats.md). For a detailed description of all generation arguments, including how to generate motion with constraints, see the full [CLI documentation](../user_guide/cli.md).

If you set up Kimodo with Docker, you can instead run generation inside the Docker container, replacing `kimodo_gen XXX` with `docker compose run --rm demo kimodo_gen XXX`. If you will be running generation multiple times, it is better to start the `demo` container (e.g., in another terminal or in the background), and then run commands inside it with `docker compose exec demo kimodo_gen XXX`.


## Interactive Motion Authoring Demo
**[Demo Documentation](../interactive_demo/index.md)**

The demo allows easily generating motions with an intuitive control interface for text prompting and constraints.

The demo can be started with:
```bash
kimodo_demo
```

The demo is a webapp that will run on [http://localhost:7860](http://localhost:7860). Open this URL in your browser to access the interface.

If you are using Docker, the demo can be launched with:
```bash
docker compose up demo
```
or if you want to start the demo and text encoder service (explained below) at the same time, use:
```bash
docker compose up
```

<details>
<summary>Additional Tips for Docker</summary>

You may find the following commands useful if running Kimodo within the Docker containers. In the example commands below, you can also replace `demo` by `text-encoder`:

**Check logs:**

```bash
docker compose logs demo
```

**Stop service:**

```bash
docker compose stop demo
```

**Restart service:**

```bash
docker compose restart demo
```

**Stop and remove everything:**

```bash
docker compose down
```

</details>


================================================
FILE: docs/source/index.md
================================================
# Kimodo Documentation

<div class="hero">
  <div class="hero-title">Kimodo</div>
  <div class="hero-subtitle">
    Scaling controllable human motion generation
  </div>
  <div class="hero-actions">
    <a href="getting_started/installation.html">Get Started</a>
    <a class="secondary" href="interactive_demo/index.html">Interactive Demo</a>
    <a class="secondary" href="https://github.com/nv-tlabs/kimodo">GitHub</a>
    <a class="secondary" href="https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf">Tech Report</a>
  </div>
</div>


## Overview

Kimodo is a **ki**nematic **mo**tion **d**iffusi**o**n model trained on a large-scale (700 hours) commercially-friendly optical motion capture dataset. The model generates high-quality 3D human and robot motions, and is controlled through text prompts and an extensive set of constraints such as full-body pose keyframes, end-effector positions/rotations, 2D paths, and 2D waypoints. See the [project page](https://research.nvidia.com/labs/sil/projects/kimodo/) for details.

## Highlights

<div class="card-grid">
  <div class="card">
    <h3>Controlled Generation</h3>
    <p>Text prompts combined with full-body, root, and end-effector constraints.</p>
  </div>
  <div class="card">
    <h3>Human(oid) Support</h3>
    <p>Model variations for both digital humans and humanoid robots.</p>
  </div>
  <div class="card">
    <h3>Interactive Demo</h3>
    <p>Timeline editing, real-time 3D visualization, and example presets.</p>
  </div>
</div>

## Quick links

- [Installation](getting_started/installation.md)
- [Quick Start](getting_started/quick_start.md)
- [Command Line Interface](user_guide/cli.md)
- [Interactive Demo](interactive_demo/index.md)
- [Project Structure](project_structure.md)

```{toctree}
:maxdepth: 3
:caption: Getting Started
:hidden:

getting_started/installation
getting_started/quick_start
```

```{toctree}
:maxdepth: 2
:caption: User Guide
:hidden:

interactive_demo/index
user_guide/cli
user_guide/constraints
user_guide/output_formats
user_guide/motion_convert
user_guide/seed_dataset
user_guide/configuration
```

```{toctree}
:maxdepth: 2
:caption: Key Concepts
:hidden:

key_concepts/model
key_concepts/limitations
key_concepts/motion_representation
key_concepts/constraints
key_concepts/skeleton
```

```{toctree}
:maxdepth: 2
:caption: Benchmark
:hidden:

benchmark/introduction
benchmark/pipeline
benchmark/metrics
benchmark/results
```

```{toctree}
:maxdepth: 2
:caption: Reference
:hidden:

project_structure
project_info
api_reference/index
```


================================================
FILE: docs/source/interactive_demo/constraints.md
================================================
# Constraints

Constraints guide the motion at specific frames or intervals. To learn about the types of constraints details of each, see the [constraints concepts](../key_concepts/constraints.md) and [constraints format](../user_guide/constraints.md) pages.

![Constraints panel](../_static/demo/constraints_panel.png)
![Editing mode](../_static/demo/editing_mode.png)

The constraint panel allows you to configure constraints and editing:

- **Enter Editing Mode**: enable FK pose editing in the viewer. Gizmos will be displayed on joints that can be edited. If there is already a constraint on the timeline for the current frame, any pose editing will adjust that constraint, otherwise you need to add a constraint on the timeline after adjusting the pose.
- **Gizmo space**: whether to display the rotation gizmos in local or global joint space while editing
- **Snap to Constraint**: will snap the current frame of motion to the constraint at that frame. This can be useful if a generated pose does not exactly meet the constraint and you want to continue editing the constraint.
- **Reset Constraint**: does the opposite by snapping the pose back to the original generated motion from the constrained pose.
- **Root 2D Options > Make Smooth Path**: if you have laid down root waypoint constraints, checking this box will turn the waypoints into a smoothed dense path constraint. If there is not a waypoint at the first and last frames of the motion, they will be automatically added since Kimodo is only trained on full-sequence paths.
- **Clear All Constraints**: clears all current constraints from the viewer and timeline.


================================================
FILE: docs/source/interactive_demo/examples.md
================================================
# Examples

The Examples Tab within the settings panel contains several examples that highlight the key capabilities and potential workflows with Kimodo.
Examples are included for the `Kimodo-SOMA-RP` and `Kimodo-G1-RP` models.

![Skeleton overview](../_static/demo/examples_panel.png)

After choosing an example from the dropdown menu, click "Load Example" to load the example configuration into the viewer.

The viewer will display the pre-generated motion along with the prompts and constraints on the timeline that were used to generate it. All settings used to generate the model are also loaded with the example (e.g., seed, classifier-free guidance settings), so you should be able to click "Generate" in the panel to recover the same result.

Example cover a variety of ways to use one or more text prompts along with kinematic constraints for generation.

**Saving New Examples**: after you've generated a motion, you can save a new Example under the "Load/Save" tab of the Settings panel. You should immediately see the Examples dropdown update with your new saved example so it can be loaded in later.

This section walks through common workflows and how to use the webapp. Each
workflow has its own section and an accompanying video.


================================================
FILE: docs/source/interactive_demo/export_results.md
================================================
# Saving/Loading

The Load/Save and Exports panels allow saving generated results and load in previously generated results

![Export panel](../_static/demo/exports_panel.png)

- **Load/Save**
    - **Motion**: save the current motion in the [NPZ format](../user_guide/output_formats.md#kimodo-npz-format) to a specific path. Motion NPZs can also be loaded into the viewer from this panel. This is useful to load in motions generated with the CLI.
    - **Constraints**: save the current constraints in the [JSON format](../user_guide/constraints.md) to a specific path. Constraint JSON files can also be load into the viewer.
    - **Example**: allows saving a new example that encompasses the current motion, constraints, and all settings. This is useful for reloading previous work. If examples are saved to the demo examples directory, they will be loadable from the Examples dropdown menu, otherwise you can load them through file path in this menu.

- **Exports**
    - **Screenshot**: save current canvas as an image that can be downloaded through your browser
    - **Video**: record the current motion to a video that can be download through your browser
    - **Motion**: save the current motion to a format of your choice depending on the loaded skeleton:
      - SOMA: `NPZ` or `BVH`
      - G1: `NPZ` or `CSV`
      - SMPL-X: `NPZ` or `AMASS NPZ`
      These formats are described in [output formats](../user_guide/output_formats.md).


================================================
FILE: docs/source/interactive_demo/generation.md
================================================
# Generation

The most important panel is the "Generate" which allows you to call Kimodo to generate one or more motions based on the prompts, constraints, and settings provided.

![Generate panel](../_static/demo/generate_panel.png)

- **Num Samples**: the number of motions to generate based on the current settings. When multiple samples are generated, you _must_ choose a single sample by clicking the character in the viewer before editing constraints or generating new motion.
- **SOMA Layer**: if using a `Kimodo-SOMA` model, this option will appear. It allows you to use the SOMA body layer to skin the character instead of using the SOMA rig. For details on the difference between the two, see the [Skeletons page](../key_concepts/skeleton.md#soma-default).
- **Seed**: random seed for repeatable generation
- **Denoising steps**: number of steps to use with DDIM
- **CFG Text/Constraint Weight**: the weights to use for classifier-free guidance
- **Post-Processing**: whether to use foot skate cleanup and constraint post-optimization to improve motion after generation
    - **Root Margin**: if the skeleton root deviates more than this margin from a constraint, the post-processing will fix it


================================================
FILE: docs/source/interactive_demo/index.md
================================================
# Interactive Demo

The web-based interactive demo provides an intuitive interface for generating motions with any of the Kimodo model variations.

![Demo Interface](../_static/overview.png)
*Interactive demo interface build with [Viser](https://github.com/viser-project/viser)*

```{note}
To see the demo in action, follow the [setup instructions](launching.md) below and launch it locally. After launching, open the demo in a web browser at http://127.0.0.1:7860 or use port forwarding if running on a server.
```

The demo provides a timeline-based interface for composing text prompts and
constraints, with real-time 3D visualization. Here are some key features:

- **Multiple Characters**: Supports generating with the SOMA, G1, and SMPL-X versions of Kimodo
- **Text Prompts**: Enter one or more natural language descriptions of desired motions on the timeline
- **Timeline Editor**: Add and edit keyframes and constrained intervals on multiple constraint tracks
- **Constraint Types**:
  - Full-Body: Complete joint position constraints at specific frames
  - 2D Root: Define waypoints or full paths to follow on the ground plane
  - End-Effectors: Control hands and feet positions/rotations
- **Constraint Editing**: Editing mode allows for re-posing of constraints or adjusting waypoints
- **3D Visualization**: Real-time rendering of generated motions with skeleton and skinned mesh options
- **Playback Controls**: Preview generated motions with adjustable playback speed
- **Multiple Samples**: Generate and compare multiple motion variations
- **Examples**: Load pre-existing examples to better understand Kimodo's capabilities
- **Export**: Save constraints and generated motions for later use


## Quick Links

- [Starting the Demo](launching.md)
- [UI Overview](ui_overview.md)
- [Examples](examples.md)


```{toctree}
:maxdepth: 2
:hidden:

launching
ui_overview
model_selection
examples
generation
constraints
export_results
```


================================================
FILE: docs/source/interactive_demo/launching.md
================================================
# Running the Demo

After following the installation [instructions](../getting_started/installation.md), the demo can be launched with the commands below. The demo runs in the web browser at [http://localhost:7860](http://localhost:7860).

</details>

<details>
<summary>If you run the demo on a server, you can use port forwarding to access it.</summary>

To access the demo's web interface when running on a remote server, set up SSH port forwarding so your web browser can reach `http://localhost:7860` as if it was local.

**Option 1: Add LocalForward to your SSH config**

Edit (or create) your SSH config file (typically `~/.ssh/config`):

```
Host your-server-name
    HostName your.server.address
    User username
    LocalForward 7860 localhost:7860
```
Then connect with:
```
ssh your-server-name
```

**Option 2: Use the SSH command-line directly**

From your local machine, run:
```
ssh -N -L 7860:localhost:7860 username@your.server.address
```
This will forward your local port 7860 to the server's port 7860.
After connecting, open [`http://localhost:7860`](http://localhost:7860) in your web browser.

Replace `username` and `your.server.address` with your actual user and server info.

</details>
</br>

If you will be restarting the demo frequently, we recommend first starting the text encoder service in the background, as detailed in the [quick start guide](../getting_started/quick_start.md#run-text-encoder-service). If the text encoder service is not running, the demo will automatically load the text encoder model.

The demo will also automatically download the Kimodo model checkpoint on launch and whenever needed when the model preference is changed in the UI.

## Launch from Command Line
If you installed Kimodo as a package or from source, the demo can be started with:
```bash
kimodo_demo
```

## Launch with Docker
If you installed with Docker, you can start the demo with:
```bash
docker compose up demo
```

<details>
<summary>Additional Tips for Docker</summary>

You may find the following commands useful if running Kimodo within the Docker containers. In the example commands below, you can also replace `demo` by `text-encoder`:

**Check logs:**

```bash
docker compose logs demo
```

**Stop service:**

```bash
docker compose stop demo
```

**Restart service:**

```bash
docker compose restart demo
```

**Stop and remove everything:**

```bash
docker compose down
```


================================================
FILE: docs/source/interactive_demo/model_selection.md
================================================
# Model Selection

Model selection allows choosing between the Kimodo models detailed in the [quick start guide](../getting_started/quick_start.md#overview-kimodo-models).

The models determine which character is loaded in the scene and the possible export options.

- **SOMA**: default human skeleton
- **G1**: MuJoCo-compatible exports
- **SMPL-X**: SMPL-X compatible outputs

For details on each skeleton, see [Skeletons](../key_concepts/skeleton.md).

<img src="../_static/demo/model_selection.png" alt="Model selection UI" width="60%">


![Skeleton overview](../_static/skeletons/skeletons.png)


================================================
FILE: docs/source/interactive_demo/ui_overview.md
================================================
# UI Overview

This page gives an overview of each of the main elements of the demo UI and how to use them.

![Demo Interface](../_static/overview.png)
*An example scene within the demo webapp*

## Viewer
![Viewer](../_static/demo/viewer.png)

The 3D viewer shows the currently generated motion. It supports skeleton or skinned mesh rendering, which is configurable in the "Visualize" panel.

### Camera
- **Left-drag**: rotate
- **Right-drag**: pan
- **Scroll**: zoom

### Playback
- **Space** to play/pause
- **←/→** to step frames, or click the frame number.

## Timeline

![Timeline](../_static/demo/timeline.png)

The timeline is where you:

- add, edit, and delete **prompt segments**
- add and delete **constraints** at frames or intervals and adjust timing

### Timeline Navigation
- **Scroll Up/Down** in the timeline: move left/right
- **Shift + Scroll** in the timeline: zoom in/out

### Prompts
- **Double-Click** a text prompt to edit the text
- **Click and Drag** the right edge of a prompt box to extend/shorten it (2-10 sec)
- **Click Empty Space** to add a prompt
- **Right-Click** a prompt to delete it

### Constraints
Constraints can be added after generating for the first time when there is an active motion in the viewer:
- **Click** in the timeline tracks (Full-Body / 2D root etc) to add a constraint of that type using the pose at that frame
- **Ctrl/Cmd + Click + Drag** to add an interval constraint, or expand a keyframe into an interval
- **Click + Drag** an existing constraint to move it to a different frame
- **Right-Click** on a constraint to delete it
- To **edit** a constraint:
    - Move playback to the target frame
    - Click **Enter Editing Mode** in the Constraints tab of the Settings Panel. Note you must exit editing mode before generating again.


## Settings Panel
![Panel](../_static/demo/panel.png)

The settings panel includes:
- model selection
- loading examples
- model parameter selection for generation and post-processing
- parameters for constraint editing
- motion loading and saving
- visualization options

Important settings panels are individually explained on subsequent pages.


================================================
FILE: docs/source/key_concepts/constraints.md
================================================
# Constraints

Constraints are time-localized signals that steer the generated motion toward
specific spatial goals while keeping the rest of the motion free for the model
to resolve. You can combine constraints with text prompts to control trajectory,
pose, and end-effectors. Constraints are most easily defined in the [interactive demo](../interactive_demo/constraints.md) and can be saved to the [JSON format](../user_guide/constraints.md).

![Overview diagram of constraint types on a timeline](../_static/constraints.png)

## Why Constraints?

Constraints allow you to:

- pin the character to a target pose or keyframe
- guide a path on the ground while preserving natural motion
- fix hands or feet at specific times (for example, touch or contact events)

## Constraint Types

Kimodo is trained to excel at specific types of constraints.

**Sparse root 2D waypoint**: ground-plane 2D waypoints that guide the global translation of the character. This constrains the 2D components of the smoothed root representation generated by Kimodo.

**Dense root 2D path**: dense 2D path constraints that guide a continuous trajectory. This constrains the 2D components of the smoothed root representation generated by Kimodo.

**Sparse full-body keyframe**: full-body pose targets at specific frames. Within the Kimodo motion representation, this constrains the smoothed root position and all body joint positions at a specific frame.

**Sparse end-effector constraint**: hands or feet targets while leaving the rest of the body flexible. This constrains the smoothed root position along with the specified end-effectors. For hands, this will constrain the wrist position and rotation along with the hand end position. For feet, it constraints the heel position and rotation along with the toe position. Kimodo is trained to support arbitrary subsets of end-effectors.

**Foot contacts**: toe/heel contact patterns. While the model is trained to support this, it is not currently implemented in the demo UI or Python API.


```{note}
For SOMA models, constraints may be authored or displayed on the full `somaskel77` skeleton, but Kimodo converts them to the reduced `somaskel30` representation before passing them to the model. See the [skeleton](./skeleton.md) section for more details.
```

## Coordinate Space

All constraint values are in a **Y-up** coordinate system with units in **meters**. The model expects constraints relative to a canonical origin where the root starts at XZ = (0, 0) at frame 0. The initial heading can be set via the `first_heading_angle` generation parameter (defaults to 0, facing +Z). See the [constraints JSON format](../user_guide/constraints.md#coordinate-space-and-units) for full details on each field.

## Time and Scope

In our CLI and demo, constraints can be defined at:

- **Single frames**: keyframe-style constraints
- **Intervals**: guidance across a range of frames

However, as described above, the model is trained to excel mostly at sparse keyframes, with dense keyframes usually only seen for root paths. See [best practices](./limitations.md) for more details.

## Post-Processing

Since it is very challenging for a neural network to strictly adhere to constraints, the demo and CLI support motion post-processing to ensure motion _exactly_ hits constraints. This is done through a lightweight optimization that smoothly adjusts joints while minimizing changes in acceleration and velocity.


================================================
FILE: docs/source/key_concepts/limitations.md
================================================
# Best Practices

On this page, we summarize the best approaches to maximize Kimodo's capabilities in terms of prompting and constraints, and also summarize known limitations and failure cases. For additional context, please see the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf).

## Text Prompting
- For best results, begin each prompt with "A person..." (e.g., "A person walks forward" or "A person jumps and waves"). This phrasing helps clarify the subject and intent of the motion, and is more closely aligned with the style of prompts used in the training data. The subject can also be stylized to better describe the motion such as "An old person..." or "A drunk person..."
- Keep each prompt focused one or at most two behaviors. For long sequences of action, split them into multiple prompts and generate in sequence.
- It's best to use a medium level of detail when describing a motion. Prompts like "A person walks." are too short and vague, while very long prompts describing detailed motion of each body part will be too much for the model to handle. Most training data is a middleground between these two. We recommend looking at the prompts in the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed) to get an idea of prompt granularity.
- Kimodo is trained on a specific set of human behaviors. The training data tends to cover locomotion, gestures, everyday activities, common object interactions, videogame combat, dancing, and various styles including tired, angry, happy, sad, scared, drunk, injured, stealthy, old, and childlike. Prompts for actions outside of these categories will likely give bad results. For example "A baseball player walks up to the plate and swings a bat" is not good, becuase Kimodo has not trained on baseball data.
- When using multiple prompts (e.g., in the timeline UI), make sure each prompt has enough information on its own. For example, if prompt 1 is "A person is walking while carrying an object", then prompt 2 could be "A person walking carrying an object comes to a stop". If prompt 2 were instead "Then the person stops", the model will not have enough context for what happened previously and may generat poor quality motions.

## Constraints
- Avoid using constraints that contradict the given text prompt or other types of constraints. If you are having trouble with a tradeoff between constraint and text accuracy, try adjusting the [classifier-free guidance weights](../user_guide/configuration.md).
- Except for dense 2d root paths, Kimodo is mainly trained to handle sparse temporal constraints. Kimodo will perform best when the number of constraints per constraint type is less than 20 keyframes.
- When foot contact accuracy and hitting constraints is high priority, make sure to enable [post-processing](./constraints.md#post-processing).

## Limitations
- **Motion length:** Maximum generated motion duration is 10 sec per prompt
- **Number of constraints:** The number of constrained frames per constraint type should be less than 20 (excluding the root path constraint)
- **Overly long or complex prompts** can blur motion intent, especially when many distinct actions are packed into a single prompt.
- **Conflicting constraints:** can lead to artifacts or constraints that are ignored
- **Multi-prompt sequences**: When generating motions with a sequence of prompts, each motion is generated one at a time. The second motion is conditioned on the last frames of the first, so the transition between prompts actually happens at the start of the second motion. This means the second prompt must devote some of its duration to performing a smooth transition, which may reduce the time available to realize the new prompt content fully.
- **Post-processing**: The model by itself can generate foot skating and will not exactly hit constraints. Post-processing helps with this, but currently does not work well for the G1 robot skeleton.


================================================
FILE: docs/source/key_concepts/model.md
================================================
# Model Overview

At a glance:
- Input: text prompt + optional constraints.
- Output: full-body motion sequence
- Core Idea: denoise motion features with a two-stage transformer at each step.

Kimodo is an explicit motion diffusion model that generates 3D human motion by denoising a sequence of skeleton poses. The model operates on a carefully designed motion representation that enables precise control over generated motion while minimizing common artifacts, such as floating and foot skating. The motion representation features a smoothed root that emulates paths drawn in practical animation tools, along with global joint rotations and positions amenable to sparse keyframe constraints.

For full details, see the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf)

![Kimodo model architecture](../_static/arch.png)

## Diffusion Process

At each step of the denoising process, the model takes in an embedding of the text prompt, a set of kinematic constraints, and the current noisy motion. Constraints are specified using the same motion representation as the input motion, and are used to overwrite the corresponding values in the noisy motion. Additionally, a mask indicating which elements are constrained is concatentated to the input motion. The goal is to predict a clean version of the input motion.

## Two-Stage Transformer Denoiser

Given these inputs, the two-stage transformer denoiser predicts a clean motion that aligns with the text and constraints. The two-stage denoiser decomposes root and body motion prediction: the root denoiser first predicts global root motion, which is transformed into a local representation as input to the body denoiser. The final output is the concatenation of the two stages.

## Training Dataset

A key component to effectively train Kimodo is the [Bones Rigplay](https://bones.studio/ai-datasets/) dataset, a large studio mocap dataset containing over 700 hours of production-quality human motion with corresponding text descriptions. The data covers locomotion, gestures, everyday activities, common object interactions, videogame combat, dancing, and various styles including tired, angry, happy, sad, scared, drunk, injured, stealthy, old, and childlike.


================================================
FILE: docs/source/key_concepts/motion_representation.md
================================================
# Motion Representation

Kimodo uses a motion representation that combines a smoothed root representation with global joint positions, rotations, and various auxiliary features.
For full details, please refer to the [tech report](https://research.nvidia.com/labs/sil/projects/kimodo/assets/kimodo_tech_report.pdf).

The representation is implemented in `kimodo/motion_rep/reps/kimodo_motionrep.py` and allows easily going to and from this feature representation.

## Coordinate System

All motion features use a right-handed coordinate system with:

- **Y up**
- **+Z forward**

## Smoothed Root Representation

We use a smoothed root trajectory for the global root position to make
path-following constraints more natural and controllable. Smoothing removes
high-frequency pelvis jitter while preserving overall motion direction, so
2D waypoints or paths drawn by users remain clean and easy to match during
generation, while the pelvis can still move naturally around the smoothed
curve.

![Comparison of smoothed root rep](../_static/smoothed_root.png)

## Pose Feature

At each frame, the pose feature vector is the concatenation of:

- **Smooth root position** (`smooth_root_pos`, 3): Smoothed pelvis/root position.
  The x/z components track ground-plane motion and y stores height.
- **Global root heading** (`global_root_heading`, 2): `[cos(theta), sin(theta)]`
  heading direction of the root.
- **Local joint positions** (`local_joints_positions`, `J x 3`): Joint positions
  in a pelvis-relative space with the smoothed root x/z offset applied.
- **Global joint rotations** (`global_rot_data`, `J x 6`): 6D rotation
  representation of each joint's global orientation.
- **Joint velocities** (`velocities`, `J x 3`): Global joint velocities.
- **Foot contacts** (`foot_contacts`, 4): Binary contact indicators for the
  left/right foot contact points.


================================================
FILE: docs/source/key_concepts/skeleton.md
================================================
# Skeletons

Different versions of Kimodo support different skeletons (character). A separate model is trained for each skeleton, with the
currently available options being [SOMA](https://github.com/NVlabs/SOMA-X), [G1](https://github.com/unitreerobotics/unitree_mujoco/tree/main/unitree_robots/g1), and [SMPL-X](https://github.com/vchoutas/smplx).

The skeletons discussed on this page are defined in `kimodo/skeleton/definitions.py`.

![Skeleton overview](../_static/skeletons/skeletons.png)

## SOMA (default)

SOMA is the default skeleton used for Kimodo. It it based on the [SOMA body model](https://github.com/NVlabs/SOMA-X), which is also used in the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed).
Kimodo uses two closely related SOMA skeleton definitions:

- **`somaskel30`**: the reduced 30-joint skeleton used internally by the model and by the core SOMA constraint formulation. It removes most finger and hand detail.
- **`somaskel77`**: the full 77-joint SOMA skeleton used for public-facing visualization and SOMA motion exports.

In practice, Kimodo predicts SOMA motions on `somaskel30` and converts them to `somaskel77` when returning or visualizing results in the demo. Older assets and examples may still be stored on `somaskel30`, and the tooling keeps backward compatibility with those files.

Note that all training data for Kimodo is on a uniform skeleton proportion corresponding to one single set of identity parameters for the SOMA body model.

!["SOMA skeletons"](../_static/skeletons/soma_skels.png)

Outputs on the SOMA skeleton can be visualized in two ways. The first is by articulating a fixed SOMA rig and doing traditional skinning (corresponds to `kimodo/viz/soma_skin.py` in the codebase).
Alternatively, we can take generated joint rotations and feed them through the SOMA layer with the set of identity parameters that correspond to the body shape of our uniform skeleton. An example of this in the codebase at `kimodo/viz/soma_layer_skin.py`, which uses the identity parameters defined from `kimodo/assets/skeletons/somaskel30/soma_base_fit_mhr_params.npz` (the same ones from BONES-SEED data).

Due to peculiarities with data processing, using the SOMA rig and SOMA layer give very slightly different results in visualization, with the SOMA rig better reflecting the data that Kimodo was trained on.

## Unitree G1

The G1 skeleton targets MuJoCo-compatible exports and robotics workflows.
The version that Kimodo uses is a 34-joint skeleton, with extra joints added for the toes to ease learning. When generated motions are exported to the MuJoCo `qpos` CSV format, these joints are removed to be compatible with downstream applications.

<img src="../_static/skeletons/g1.png" alt="G1 skeleton" width="60%">

## SMPL-X

This aligns with the SMPL-X model and supports AMASS-style exports. It uses 22 joints corresponding to only the body joints. This option is useful for compatibility with SMPL-X pipelines or downstream tools expecting AMASS parameters, but it is **not** the recommended Kimodo model to use since generated motions may display particularly severe retargeting artifacts.

<img src="../_static/skeletons/smplx.png" alt="SMPL-X skeleton" width="60%">


================================================
FILE: docs/source/project_info.md
================================================
# Project Information

## Citation

If you use this code in your research, please cite:

```bibtex
@article{Kimodo2026,
  title={Kimodo: Scaling Controllable Human Motion Generation},
  author={Rempe, Davis and Petrovich, Mathis and Yuan, Ye and Zhang, Haotian and Peng, Xue Bin and Jiang, Yifeng and Wang, Tingwu and Iqbal, Umar and Minor, David and de Ruyter, Michael and Li, Jiefeng and Tessler, Chen and Lim, Edy and Jeong, Eugene and Wu, Sam and Hassani, Ehsan and Huang, Michael and Yu, Jin-Bey and Chung, Chaeyeon and Song, Lina and Dionne, Olivier and Kautz, Jan and Yuen, Simon and Fidler, Sanja},
  journal={arXiv:2603.15546},
  year={2026}
}
```

## License

The codebase is licensed under Apache-2.0. Please see the codebase for full license text. Note that model checkpoints are licensed separately as indicated on the HuggingFace download pages.

## Acknowledgments

This project builds upon several excellent open-source projects:

- [Viser](https://github.com/nerfstudio-project/viser) for 3D visualization
- [LLM2Vec](https://github.com/McGill-NLP/llm2vec) for text encoding

## Contact

For questions or issues, plese open an issue on this repository or reach out directly to the authors.


================================================
FILE: docs/source/project_structure.md
================================================
# Project Structure

```text
kimodo/
├── kimodo/                       # Main Python package
│   ├── model/                    # Model architecture and loading
│   │   ├── kimodo_model.py       # Kimodo diffusion model wrapper
│   │   ├── twostage_denoiser.py  # Two-stage denoising architecture
│   │   ├── backbone.py           # Transformer encoder backbone
│   │   ├── diffusion.py          # Diffusion process
│   │   ├── cfg.py                # Classifier-free guidance
│   │   ├── common.py             # Shared model utilities
│   │   ├── load_model.py         # Model loading and registry lookup
│   │   ├── loading.py            # Checkpoint loading utilities
│   │   ├── registry.py           # Model registry (skeleton, checkpoint URLs)
│   │   ├── text_encoder_api.py   # Text encoder API client
│   │   ├── tmr.py                # TMR compatibility
│   │   └── llm2vec/              # LLM-based text encoder
│   ├── motion_rep/               # Motion representation
│   │   ├── reps/                 # Skeleton-specific motion reps
│   │   │   ├── base.py           # Base motion rep types
│   │   │   ├── kimodo_motionrep.py
│   │   │   └── tmr_motionrep.py
│   │   ├── conditioning.py       # Conditioning (text, constraints)
│   │   ├── feature_utils.py      # Feature extraction
│   │   ├── feet.py               # Foot contact / smoothing
│   │   ├── smooth_root.py        # Smooth root representation
│   │   └── stats.py              # Normalization statistics
│   ├── skeleton/                 # Skeleton definitions and kinematics
│   │   ├── definitions.py        # Skeleton topology (joints, chains)
│   │   ├── registry.py           # Skeleton registry
│   │   ├── base.py               # Base skeleton types
│   │   ├── kinematics.py         # Forward kinematics
│   │   ├── transforms.py         # Rotation/transform utilities
│   │   └── bvh.py                # BVH I/O
│   ├── viz/                      # Visualization
│   │   ├── scene.py              # 3D scene setup
│   │   ├── playback.py           # Timeline / motion playback
│   │   ├── viser_utils.py        # Viser 3D helpers
│   │   ├── gui.py                # Demo GUI components
│   │   ├── constraint_ui.py      # Constraint editing UI
│   │   ├── coords.py             # Coordinate frames
│   │   ├── soma_skin.py          # SOMA character skinning
│   │   ├── soma_layer_skin.py    # SOMA layer-based skinning
│   │   ├── smplx_skin.py         # SMPL-X skinning
│   │   └── g1_rig.py             # G1 robot rig
│   ├── demo/                     # Interactive web demo
│   │   ├── app.py                # Demo entry (Gradio / Viser)
│   │   ├── config.py             # Demo configuration
│   │   ├── state.py              # Application state
│   │   ├── ui.py                 # UI layout and callbacks
│   │   ├── generation.py         # Generation pipeline for demo
│   │   ├── embedding_cache.py    # Cached text embeddings
│   │   ├── queue_manager.py      # Request queue for demo
│   │   └── __main__.py           # Demo run as module
│   ├── exports/                  # Motion I/O and format conversion
│   │   ├── motion_io.py          # Kimodo motion dict helpers (load, save, resample)
│   │   ├── motion_convert_lib.py # Library API for format conversion
│   │   ├── motion_formats.py     # Format detection and FPS resolution
│   │   ├── bvh.py                # SOMA BVH read/write
│   │   ├── mujoco.py             # G1 MuJoCo qpos conversion
│   │   └── smplx.py              # AMASS / SMPL-X conversion
│   ├── metrics/                  # Evaluation metrics
│   │   ├── base.py               # Metric base classes
│   │   ├── foot_skate.py         # Foot skate metrics
│   │   ├── constraints.py        # Constraint metrics
│   │   └── tmr.py                # TMR-based metrics
│   ├── scripts/                  # CLI and helper scripts
│   │   ├── generate.py           # CLI for motion synthesis (kimodo_gen)
│   │   ├── motion_convert.py     # CLI for format conversion (kimodo_convert)
│   │   ├── run_text_encoder_server.py  # Text encoder server (kimodo_textencoder)
│   │   ├── gradio_theme.py       # Gradio theme for demo
│   │   ├── lock_requirements.py  # Dependency locking
│   │   └── mujoco_load.py        # MuJoCo g1 csv loading
│   ├── assets/                   # Package data (shipped with package)
│   │   ├── demo/                 # Demo examples and config
│   │   └── skeletons/            # Skeleton assets
│   ├── constraints.py            # Constraint definitions and handling
│   ├── geometry.py               # Geometric utilities
│   ├── postprocess.py            # Post-processing (e.g. MotionCorrection)
│   ├── meta.py                   # Motion metadata
│   ├── sanitize.py               # Input sanitization
│   ├── assets.py                 # Asset path resolution
│   └── tools.py                  # General utilities
├── benchmark/                    # Evaluation pipeline scripts
│   ├── create_benchmark.py       # Step 1: Build test suite from SEED + metadata
│   ├── generate_eval.py          # Step 2: Generate motions for test suite
│   ├── embed_folder.py           # Step 3: Embed motions and text with TMR
│   ├── evaluate_folder.py        # Step 4: Compute metrics for test cases
│   └── parse_folder.py           # Step 5: Aggregate and display results
├── MotionCorrection/             # Optional C++/Python post-processing
│   ├── python/motion_correction/ # Python bindings
│   └── src/cpp/                  # C++ implementation
├── docs/                         # Documentation (Sphinx)
│   └── source/                   # RST/MD sources
├── assets/                       # Repo-level assets (banner, screenshots)
├── pyproject.toml                # Package config and entry points
├── setup.py                      # Setuptools entry (if needed)
├── Dockerfile                    # Container image for demo
├── docker-compose.yaml           # Docker Compose for demo + text encoder
└── README.md
```

Entry points (from `pyproject.toml`):

- **`kimodo_gen`** — command-line motion synthesis (`kimodo.scripts.generate:main`)
- **`kimodo_demo`** — interactive web demo (`kimodo.demo:main`)
- **`kimodo_convert`** — motion format conversion (`kimodo.scripts.motion_convert:main`)
- **`kimodo_textencoder`** — text encoder server (`kimodo.scripts.run_text_encoder_server:main`)


================================================
FILE: docs/source/user_guide/cli.md
================================================
# Command-Line Interface

The primary CLI entrypoint is the `kimodo_gen` command. This corresponds to the script located in `kimodo/scripts/generate.py`, therefore you can equivalently use `python -m kimodo.scripts.generate`.

**Docker Usage**: If you set up Kimodo with Docker, you can instead run generation inside the Docker container, replacing `kimodo_gen XXX` with `docker compose run --rm demo kimodo_gen XXX`. If you will be running generation multiple times, it is better to start the `demo` container (e.g., in another terminal or in the background), and then run commands inside it with `docker compose exec demo kimodo_gen XXX`.

**Single Prompt Generation:**

```bash
kimodo_gen "A person walks forward." \
    --model Kimodo-SOMA-RP-v1 \
    --duration 5.0 \
    --output output
```

The `--model` command corresponds to the Kimodo model checkpoint to generate with. By default, the `Kimodo-SOMA-RP-v1` is used if not provided. The output motion will be saved using the stem name given by `--output` in the Kimodo [output format](../user_guide/output_formats.md). If generating with a G1 or SMPL-X model, you can also save to other output formats like MuJoCo qpos CSV file and AMASS NPZ format.

For **offline conversion** between Kimodo NPZ, AMASS NPZ, SOMA BVH, and G1 MuJoCo CSV after generating, use `kimodo_convert` (see [Motion format conversion](motion_convert.md)).

**Multi-Prompt Generation:**

Generating from a sequence of text prompts can be achieved by using multiple sentences separated by periods with corresponding durations:

```bash
kimodo_gen "A person walks forward. A person is walking backwards." \
    --duration "5.0 4.0" \
```

This command will use Kimodo to generate each prompt in sequence, with constraints added to the beginning of the second prompt to ensure continuity with the first generated motion. You can control how many frames are used to blend consecutive motions with the `--num_transition_frames` argument.

**Single Prompt with Constraints:**

Generation can be constrained by providing a constraints JSON file (see the [Constraints Format Definition](constraints.md)).

```bash
kimodo_gen "A person walks forward and picks something up from the ground." \
    --model Kimodo-SOMA-RP-v1 \
    --duration 5.0 \
    --constraints kimodo/assets/demo/examples/kimodo-soma-rp/03_full_body_keyframes/constraints.json
```

Constraint files can be created and saved from the interactive demo or manually defined following
the [constraints format guide](constraints.md).

## Output Formats

For full details on output formats, see [this page](output_formats.md).

To convert between these formats offline, see [Motion format conversion](motion_convert.md) (`kimodo_convert`).

CLI generation uses a single **output stem** (`--output`) for all formats (NPZ, AMASS NPZ, CSV, and BVH). It can write either **one file** or **a folder of files**, depending on the number of samples:

- **One sample** (`--num_samples 1`): writes a single file per format at the stem (e.g. `--output test` → `test.npz`, `test.csv`). No folder is created. For SMPLX, AMASS is written to `test_amass.npz`.
- **Multiple samples**: creates a folder with that stem and writes one file per sample with suffixes `_00`, `_01`, etc. (e.g. `--output test` → `test/test_00.npz`, ...).

Use the `--bvh` flag to also export BVH (SOMA only) to the same stem.

### Output Rest Pose

For SOMA-based Kimodo models, motions can be exported with respect to two different rest poses. The default rest pose, that is always used by the `NPZ` format, is a standard T-pose consistent with the canonical T-pose of the SOMA model. For `BVH` outputs, the default rest pose is a non-standard pose, but it is consistent with the BVH format of the [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed). To output a `BVH` file with the standard T-pose as the rest pose, you can use the `--bvh_standard_tpose` option.

The standard T-pose used by Kimodo is available as a BVH file in the [repo assets](https://github.com/nv-tlabs/kimodo/tree/main/kimodo/assets/skeletons/somaskel77).

## Visualizing Generated Motions

Motions generated with the CLI can be visualized in the demo UI. To do this, under "Load/Save" > "Motion", type in the path of the generated output npz file, then click "Load Motion" to load it into the viewer. If you used constraints when generating, those can also be loaded in in a similar way.

## Arguments

To see all available flags, run `kimodo_gen --help`. They are:

- `prompt`: Text description of the desired motion (required)
- `--model`: Model name to use (default: `Kimodo-SOMA-RP-v1`; options are the models in [this table](../getting_started/quick_start.md#overview-kimodo-models))
- `--duration`: Motion duration in seconds (default: `5.0`). For multiple prompts,
  pass space-separated durations in a string.
- `--diffusion_steps`: Number of denoising steps (default: `100`)
- `--num_samples`: Number of motion variations to generate (default: `1`)
- `--num_transition_frames`: Frames used to blend between prompts (default: `5`)
- `--constraints`: Path to a JSON file containing constraints
- `--output`: Output stem name (default: `output`). Used for all formats (NPZ, AMASS NPZ, CSV, BVH). With one sample, writes a single file per format (e.g. `test.npz`, `test.csv`). With multiple samples, creates a folder and writes `test_00.npz`, `test_01.npz`, … inside it. For SMPLX with one sample, AMASS is written to `stem_amass.npz` so it does not overwrite the main NPZ.
- `--save_example_dir`: If given, saves outputs to an "example" directory structure that can be loaded in the Kimodo demo.
- `--bvh`: Optional flag. When set, also export BVH (SOMA models only) using the same stem as `--output`.
- `--bvh_standard_tpose`: If exporting BVH, export with the rest pose being the standard T-pose rather than the rest pose consistent with the BONES-SEED dataset.
- `--seed`: Seed for reproducible results
- `--no-postprocess`: Disable post-processing (includes foot skate cleanup and constraint optimization)
- `--input_folder`: Folder containing meta.json and optional constraints.json. If set, generation settings are loaded from meta.json. These are found in demo example folders.
- `--cfg_type`: Classifier-free guidance mode: `nocfg`, `regular`, or `separated` (the custom mode with independent text and constraint scales). See {ref}`Classifier-free guidance (details) <classifier-free-guidance-cfg>` below.
- `--cfg_weight`: One float for `regular` CFG, or two floats `[text_weight, constraint_weight]` for `separated` CFG. If you pass only weights (no `--cfg_type`), one value implies `regular` and two imply `separated`. Not used with `nocfg`.

:::{dropdown} Classifier-free guidance (CFG)
:name: classifier-free-guidance-cfg

The CLI mirrors the Python API in [Generation parameters](configuration.md): Kimodo supports standard CFG (`regular`) and a **separated** variant with two scales—text vs. constraints—which is the usual setting in this project.

**Rules:**

- `nocfg`: no weights; do not pass `--cfg_weight`.
- `regular`: pass exactly one value after `--cfg_weight`.
- `separated`: pass exactly two values after `--cfg_weight`.

If you pass **`--cfg_type` or `--cfg_weight` on the command line**, those values override any `cfg` block in `meta.json` when using `--input_folder`. If you omit both flags, `meta.json` may still supply CFG via `cfg.enabled`, `cfg.text_weight`, and `cfg.constraint_weight` (same shape as the interactive demo examples). If there is no CLI CFG and no `cfg` in meta, the model uses its built-in defaults.

Examples:

```bash
# No classifier-free guidance
kimodo_gen "A person walks." --cfg_type nocfg

# Standard CFG (single scale)
kimodo_gen "A person walks." --cfg_type regular --cfg_weight 2.5

# Separated CFG (text scale, then constraint scale)
kimodo_gen "A person walks." --cfg_type separated --cfg_weight 2.0 1.5

# Infer mode from arity: one float -> regular; two floats -> separated
kimodo_gen "A person walks." --cfg_weight 2.0 2.0
```

:::

## Python API
The `kimodo/scripts/generate.py` script is a good place to start to familiarize yourself with the Python API of Kimodo if you'd like to use this directly. The full model API is detailed in the [API documentation](../api_reference/index.rst).

If you want to use kimodo in another project, you can interact with it like this:

```python
from kimodo import load_model

model = load_model("kimodo-soma-rp", device="cuda")
output = model(
    prompt="A person jumps",
    num_frames=150,
    num_denoising_steps=100,
)
```


================================================
FILE: docs/source/user_guide/configuration.md
================================================
# Generation Parameters

In the demo UI, command-line tool (`kimodo_gen` / `python -m kimodo.scripts.generate`), and low-level Python API, Kimodo allows some advanced configuration for motion generation.

## Classifier-Free Guidance

Control the strength of text and constraint guidance:

```python
output = model(
    prompt="A person jumps",
    num_frames=150,
    cfg_weight=[2.0, 2.0],  # [text_weight, constraint_weight]
    cfg_type="separated",  # Options: "nocfg", "regular", "separated"
    num_denoising_steps=100,
)
```

These are helpful when there is a tradeoff between following the prompt and hitting constraints.

The CFG options are:
- `cfg_type="nocfg"`: No guidance (faster, less controllable)
- `cfg_type="regular"`: "Standard" classifier-free guidance
    - Equation: `out_uncond + w * (out_text_and_constraint - out_uncond)`
- `cfg_type="separated"`: Separate weights for text and constraints
    - Equation: `out_uncond + w_text * (out_text - out_uncond) + w_constraint * (out_constraint - out_uncond)`

### CLI

The same options are available from the command line as `--cfg_type` and `--cfg_weight`. See the {ref}`CLI user guide (CFG) <classifier-free-guidance-cfg>` for examples, validation rules, and how `meta.json` interacts with explicit flags when using `--input_folder`.

## Denoising Steps
The number of denoising steps used in DDIM sampling can be used to control the speed vs. quality trade-off:
- Fewer steps (50-100): Faster inference, slightly lower quality
- More steps (100-200): Higher quality, slower inference


================================================
FILE: docs/source/user_guide/constraints.md
================================================
# Constraints JSON Format

The `--constraints` flag in the CLI expects a JSON file containing a list of constraint objects.
It is easiest to look at the examples provided with the demo to see how these are formatted. These can be seen for various model types in `kimodo/assets/demo/examples`.

> Tip: the easiest way to get a valid constraints file is to create constraints in the interactive demo and to click on `Save Constraints`.

## High-Level Structure

- The file is a JSON array: `[{...}, {...}, ...]`
- Each element is an object with at least:
  - `type` (string)
    - `root2d`, `fullbody`, `left-hand`, `right-hand`, `left-foot`, `right-foot`, `end-effector`
  - `frame_indices` (array of integers): 0-based frame indices within the generated clip.


```{note}
For SOMA models, constraints may be authored or displayed on the full `somaskel77` skeleton, but Kimodo converts them to the reduced `somaskel30` representation before passing them to the model. See the [skeleton](../key_concepts/skeleton.md) section for more details.
```

## Coordinate Space and Units

All spatial values in constraints use the same coordinate system as Kimodo's internal motion representation:

- **Axes**: **Y-up**, with locomotion on the **XZ ground plane**. The Y axis points up, X and Z span the horizontal ground plane.
- **Units**: **Meters**. Joint positions, root translations, and 2D root coordinates are all in meters.

### Canonicalization

During training, every motion is *canonicalized* so that the (smoothed) root starts at the XZ origin `(0, 0)` at frame 0.
The initial body heading (facing direction) is randomly rotated and passed to the model as an explicit input (`first_heading_angle`), so the model is robust to arbitrary initial orientations.

At inference, constraints should be authored **relative to this canonical origin**:
- `smooth_root_2d` values at frame 0 should be at `(0, 0)`, with subsequent frames expressing displacement from there.
- `root_positions` XZ components follow the same convention; Y is the **absolute hip height above the ground** (typically ~0.9 m for a standing pose, lower for crouching/sitting).
- `first_heading_angle` (a generation parameter, not part of the constraints JSON) defaults to `0.0` radians (facing +Z) but can be set to any value to change the initial facing direction.

### Field-specific notes

| Field | Space | Notes |
|-------|-------|-------|
| `smooth_root_2d` | `[x, z]` ground plane (meters) | Relative to the canonical origin. |
| `root_positions` | `[x, y, z]` (meters) | Y is absolute hip height above ground. XZ relative to canonical origin. |
| `global_root_heading` | `[cos(θ), sin(θ)]` | **Not** a raw radian value — must be a 2-element cosine/sine pair per frame (i.e. the heading direction vector). |
| `local_joints_rot` | axis-angle (radians) | Local joint rotations in the skeleton's rest-pose frame. |

### Constraints not at frame 0

Adding a constraint at frame 0 is **not** required. If the first constrained frame is later in the sequence (e.g. frame 45), Kimodo generates the initial frames freely from its learned distribution, starting near XZ = (0, 0) with the heading set by `first_heading_angle`. The constraint just needs to be reachable from that starting configuration given the text prompt and motion duration.

## Constraint Types
Depending on `type`, additional fields are required or optional. All numeric arrays are plain nested JSON lists. In the following definitions `T` is the number of constrainted frames (i.e., number of `frame_indices`) and `J` is the number of skeleton joints.


### `root2d`
This captures 2D root waypoints and 2D root paths. It requires:

- `smooth_root_2d` (array shapes `[T, 2]`): Smoothed root positions `[x, z]` on the ground plane at the given `frame_indices`.

and optionally:
- `global_root_heading` (array shapes `[T, 2]`): Global root heading direction `[cos, sin]` at the given `frame_indices`.

### `fullbody`
This captures full-body keyframe constraints on joint positions. It includes:

- `local_joints_rot` (array shaped `[T, J, 3]`): Per-frame per-joint **axis-angle** local rotations (radians). Constraint joint positions will be derived from these.
- `root_positions` (array shaped `[T, 3]`): Root (hips) translation `[x, y, z]`.
- `smooth_root_2d` (optional; array of `[T, 2]`): Smoothed root positions `[x, z]`. If omitted, it is taken as the `[x, z]` components of `root_positions`.

Note the `local_joint_rot` will not explicitly be constrained, the constraint will be on the joint positions that results from FK with the given joint rotations.

### `left-hand` / `right-hand` / `left-foot` / `right-foot`
Captures end-effector constraints on the hand/feet joint positions and global rotations.

These use the same fields as `fullbody`. However, under the hood these will only affect the corresponding end-effectors and hips. Each of these types is a shorthand for `end-effector` with pre-set joint names.

### `end-effector`
A general end-effector constraint that requires an additional field:

- `joint_names` (array of strings): Which end-effectors to constrain (e.g. `["left_hand"]`, `["right_foot", "left_foot"]`). Available names depend on the skeleton; see the skeleton's `expand_joint_names()` for the full mapping.

Otherwise uses the same fields as `fullbody` (`local_joints_rot`, `root_positions`, optional `smooth_root_2d`).

## Examples

### Root 2D waypoints

```json
[
  {
    "type": "root2d",
    "frame_indices": [0, 30, 60],
    "smooth_root_2d": [[0.0, 0.0], [0.5, 0.0], [1.0, 0.1]]
  }
]
```

### Full-body keyframe

```json
[
  {
    "type": "fullbody",
    "frame_indices": [60],
    "root_positions": [[0.0, 0.96, 1.5]],
    "local_joints_rot": [[[0.0, 0.0, 0.0], "... one [3] per joint ..."]]
  }
]
```

Here `root_positions` places the hips at x=0, y=0.96 m (standing height), z=1.5 m forward from the origin. `local_joints_rot` is a `[T, J, 3]` array of axis-angle rotations for every joint in the skeleton.


================================================
FILE: docs/source/user_guide/motion_convert.md
================================================
# Motion Format Conversion

The `kimodo_convert` command converts between the formats described in [Output formats](output_formats.md): **Kimodo NPZ**, **AMASS NPZ** (SMPL-X), **SOMA BVH**, and **G1 MuJoCo CSV**.

## Frame rate (30 Hz Kimodo NPZ)

Any conversion **to Kimodo NPZ** (from AMASS, SOMA BVH, or G1 CSV) **writes motion at 30 Hz**, matching Kimodo’s common generation rate. If the detected source rate differs, the tool **resamples** along time, then derived channels (contacts, smooth root, heading) are recomputed via forward kinematics.

If resampling is required, a **warning** is emitted with the assumed source rate, input/output frame counts, and a reminder that `--source-fps` sets the **source** rate if autodetection is wrong. When the source is already ~30 Hz with the same frame count, no warning is shown (motion is only re-derived via FK for consistency).

<details>
<summary>Resampling strategy details</summary>

The resampler picks one of two strategies based on the ratio `source_fps / target_fps`:

- **Integer-ratio fast path** — When the ratio is close to an integer ≥ 2 (within a tolerance of 0.05), the resampler simply takes every *step*-th frame (`frames[::step]`). For example, 120 Hz → 30 Hz has ratio 4, so every 4th frame is kept. This is exact and very fast.
- **Interpolation fallback** — Otherwise, the output timeline is linearly spaced over the input range. Root positions are linearly interpolated, and local joint rotations are interpolated via quaternion slerp. This handles arbitrary rate conversions (e.g. 50 Hz → 30 Hz).

In both cases, `complete_motion_dict` is re-run at the target rate so that all derived channels (velocities, foot contacts, heading, smooth root) stay consistent with the new frame spacing.

</details>

## Usage

```bash
kimodo_convert INPUT OUTPUT [options]
```

Formats are inferred from file extensions and (for `.npz`) from file contents. You can override with `--from` and `--to`.

### Supported conversions

| From | To | Notes |
|------|-----|--------|
| AMASS `.npz` | Kimodo `.npz` | SMPL-X, 22 joints. Uses `--z-up` by default (same as Kimodo’s AMASS export). |
| Kimodo `.npz` | AMASS `.npz` | Requires `local_rot_mats` with 22 joints (SMPL-X). |
| SOMA `.bvh` | Kimodo `.npz` | Expects a **Kimodo-exported** SOMA BVH (same hierarchy as `save_motion_bvh`). If the BVH uses the standard T-pose as rest pose, pass in `--bvh_standard_tpose`. |
| Kimodo `.npz` | SOMA `.bvh` | Accepts 77 joints (SOMA full) or 30 joints (somaskel30, auto-expanded to 77 with relaxed-hand rest poses). If you want the output BVH to use the standard T-pose as rest pose, pass in `--bvh_standard_tpose`. |
| G1 `.csv` | Kimodo `.npz` | Rows of shape `(36,)` = root xyz + root quat + 29 joint angles (see [output_formats](output_formats.md#csv-format-for-kimodo-g1)). |
| Kimodo `.npz` | G1 `.csv` | Requires 34 joints (G1). |

### Common options

- **`--source-fps`**: Source motion frame rate in Hz (used before resampling to 30 Hz for Kimodo NPZ). If omitted, the tool auto-detects from `mocap_frame_rate` (AMASS), `Frame Time` (BVH), or defaults to **30** Hz. The legacy `--fps` alias is still accepted for backward compatibility.
- **`--no-z-up`**: For AMASS, disable the Y-up ↔ Z-up transform (treat data as already in Kimodo Y-up, +Z forward).
- **`--mujoco-rest-zero`**: For G1 CSV, match the `mujoco_rest_zero` flag used when the CSV was written (see `MujocoQposConverter.dict_to_qpos`).
- **`--bvh_standard_tpose`**: If input or output is BVH: the BVH file uses the standard T-pose as its rest pose instead of the BONES-SEED rest pose.

### Examples

```bash
# AMASS → Kimodo NPZ
kimodo_convert motion_amass.npz motion_kimodo.npz

# Kimodo NPZ → AMASS
kimodo_convert motion_kimodo.npz motion_out_amass.npz

# Kimodo SOMA NPZ → BVH
kimodo_convert motion_kimodo.npz motion.bvh

# BVH → Kimodo NPZ
kimodo_convert motion.bvh motion_kimodo.npz

# G1 CSV → Kimodo NPZ
kimodo_convert motion.csv motion_kimodo.npz

# Kimodo G1 NPZ → CSV
kimodo_convert motion_kimodo.npz motion.csv
```

When both input and output are `.npz`, the tool assumes **AMASS → Kimodo** if the input is AMASS, and **Kimodo → AMASS** if the input is already a Kimodo NPZ. Use `--from` / `--to` if you need to disambiguate.

## Limitations

- **BVH import** is intended for BVHs produced by Kimodo (`Root` wrapper + SOMA77 joint names) and is also compatible with the BONES-SEED dataset, which uses the same skeleton hierarchy. Arbitrary BVH files with different joint names or hierarchies may not work.
- **G1 CSV** encodes only the degrees of freedom exposed in MuJoCo; the inverse path reconstructs local rotations from those angles (same convention as `to_qpos`).


================================================
FILE: docs/source/user_guide/output_formats.md
================================================
# Output Formats

## Converting Between Formats

To convert between the formats described below, see [Motion format conversion](motion_convert.md) (`kimodo_convert`).

## Kimodo NPZ Format

Generated motions are stored as NPZ files (one file per sample, e.g. `motion_00.npz`) containing:

- `posed_joints`: Global joint positions `[T, J, 3]`
- `global_rot_mats`: Global joint rotation matrices `[T, J, 3, 3]`
- `local_rot_mats`: Local (parent-relative) joint rotation matrices `[T, J, 3, 3]`
- `foot_contacts`: Foot contact labels [left heel, left toe, right heel, right toes] `[T, 4]`
- `smooth_root_pos`: Smoothed root representations outputted from the model `[T, 3]`
- `root_positions`: The (non-smoothed) trajectory of the actual root joint (e.g., pelvis) `[T, 3]`
- `global_root_heading`: The heading direction output from the model `[T, 2]`

Where:

- `T`: number of frames
- `J`: number of joints in the exported skeleton representation (`77` for SOMA NPZ exports, `34` for G1, `22` for SMPL-X)

If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc.

For SOMA models, the exported NPZ uses the full **`somaskel77`** skeleton even though the model itself operates internally on the reduced **`somaskel30`** skeleton. This means the saved `posed_joints`, `global_rot_mats`, and `local_rot_mats` arrays are written in the 77-joint SOMA layout. Older 30-joint SOMA NPZ files may still exist and remain loadable for backward compatibility.

Also for SOMA models, the output motion is saved such that the rest pose (i.e. zero pose) is the standard T-pose that Kimodo uses internally. This differs from the default behavior of BVH export (see below), which uses a rest pose consistent with the BONES-SEED dataset format. The standard T-pose as a BVH file is also available [in the assets of the repo](https://github.com/nv-tlabs/kimodo/tree/main/kimodo/assets/skeletons/somaskel77).

## BVH Format for Kimodo-SOMA

When using a SOMA model and passing the `--bvh` flag to CLI generation, Kimodo also writes a BVH file alongside the NPZ output.

- BVH export is supported for **SOMA models only**
- the exported hierarchy uses the full **`somaskel77`** skeleton
- if the motion is still in internal `somaskel30` form, Kimodo converts it to `somaskel77` before writing the BVH
- the file stores root translation plus per-joint local rotations for the clip at the generated frame rate
- by default, the rest pose (i.e., zero pose) of the saved BVH file is consistent with the BONES-SEED dataset format. If you prefer a standard T-pose as the rest pose, pass in `--bvh_standard_tpose` when generating.

The exporter writes a standard plain-text BVH file and scales joint offsets and root motion from meters to centimeters (same format as the SEED dataset release). If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc.

## CSV Format for Kimodo-G1

When using `Kimodo-G1` models and providing `--output` to CLI generation, the exporter writes MuJoCo `qpos`
data to a CSV file. Each row corresponds to a pose in the motion and contains 36 values:

- Root translation `[x, y, z]`
- Root rotation quaternion `[w, x, y, z]`
- 29 joint 1-DoF values (in G1 joint order)

The CSV uses the MuJoCo coordinate system (z-up, +x forward). If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc.


## AMASS NPZ Format for Kimodo-SMPLX

When using the `Kimodo-SMPLX-RP` model and `--output` is specified to CLI generation, the exporter writes an
AMASS-style SMPL-X `.npz` file. Keys include:

- `trans`: Root translation `[T, 3]`
- `root_orient`: Root orientation axis-angle `[T, 3]`
- `pose_body`: Body pose axis-angle `[T, 63]` (21 joints x 3)
- `pose_hand`: Hand pose axis-angle `[T, 90]` (15 joints x 2 hands x 3)
- `pose_jaw`: Jaw pose axis-angle `[T, 3]`
- `pose_eye`: Eye pose axis-angle `[T, 6]`
- `betas`: Shape coefficients
- `num_betas`: Number of shape coefficients
- `gender`: `neutral`
- `surface_model_type`: `smplx`
- `mocap_frame_rate`: Frame rate (fps)
- `mocap_time_length`: Motion duration in seconds

The exporter converts from the Kimodo coordinate system (y-up, +z forward)
to AMASS coordinates (z-up, +y forward). If multiple samples are generated, files are saved with suffixes like `_00`, `_01`, etc.


================================================
FILE: docs/source/user_guide/seed_dataset.md
================================================
# Loading BONES-SEED BVH data

The [BONES-SEED dataset](https://huggingface.co/datasets/bones-studio/seed) is a publicly available optical motion-capture dataset distributed as BVH files with the [SOMA 77-joint skeleton](../key_concepts/skeleton.md). This page walks through the steps to parse a SEED BVH file and convert it into Kimodo's internal motion representation.

This is a similar pipeline used by the benchmark to extract ground-truth motions from SEED data (see the [benchmark pipeline](../benchmark/pipeline.md)).

## Step-by-Step Conversion

### 1. Parse the BVH file

`parse_bvh_motion` reads a BVH file and returns local joint rotation matrices, root translation (in meters), and the source frame rate.

```python
from kimodo.skeleton.bvh import parse_bvh_motion

local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(bvh_path)
```

### 2. Subsample to 30 FPS

Kimodo operates at 30 Hz. If the source BVH has a different frame rate (120 FPS for BONES-SEED), subsample by striding:

```python
fps = 30
step = round(bvh_fps / fps)
root_trans = root_trans[::step]
local_rot_mats = local_rot_mats[::step]
```

### 3. Convert to the standard T-pose

The SEED BVH rest pose differs from Kimodo's canonical T-pose. The `to_standard_tpose` function remaps the local rotations accordingly and returns both local and global rotation matrices:

```python
from kimodo.skeleton import SOMASkeleton77

skeleton = SOMASkeleton77()
local_rot_mats, global_rot_mats = skeleton.to_standard_tpose(local_rot_mats)
```

### 4. Compute Kimodo motion features

Build the motion feature tensor used by the model. The feature layout is described in [Motion representation](../key_concepts/motion_representation.md).

```python
from kimodo.motion_rep import KimodoMotionRep

motion_rep = KimodoMotionRep(skeleton, fps)
feats = motion_rep(local_rot_mats, root_trans, to_normalize=False)
```

### 5. Canonicalize (optionally) and recover the motion dictionary

Canonicalize so that the motion starts at the origin facing +Z, then invert the features back into a full motion dictionary:

```python
can_feats = motion_rep.canonicalize(feats)
motion_dict = motion_rep.inverse(can_feats, is_normalized=False)
```

`motion_dict` is a dictionary with keys such as `local_rot_mats`, `global_rot_mats`, `posed_joints`, `root_positions`, `smooth_root_pos`, `foot_contacts`, etc. See [Output formats](output_formats.md) for details on the Kimodo NPZ layout.

## Full script

```python
from kimodo.motion_rep import KimodoMotionRep
from kimodo.skeleton import SOMASkeleton77
from kimodo.skeleton.bvh import parse_bvh_motion

# 1. Parse BVH
local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(bvh_path)

# 2. Subsample to 30 fps
fps = 30
step = round(bvh_fps / fps)
root_trans = root_trans[::step]
local_rot_mats = local_rot_mats[::step]

# 3. Convert to standard T-pose
skeleton = SOMASkeleton77()
local_rot_mats, global_rot_mats = skeleton.to_standard_tpose(local_rot_mats)

# 4. Compute motion features
motion_rep = KimodoMotionRep(skeleton, fps)
feats = motion_rep(local_rot_mats, root_trans, to_normalize=False)

# 5. Canonicalize and get the full motion dictionary
can_feats = motion_rep.canonicalize(feats)
motion_dict = motion_rep.inverse(can_feats, is_normalized=False)
```


================================================
FILE: kimodo/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Kimodo: text-driven and constrained motion generation model."""

from .model.load_model import AVAILABLE_MODELS, DEFAULT_MODEL, load_model

__all__ = [
    "AVAILABLE_MODELS",
    "DEFAULT_MODEL",
    "load_model",
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/01_single_text_prompt/meta.json
================================================
{
  "text": "A person walking forward quickly stumbles but maintains their balance",
  "duration": 5.0,
  "num_samples": 1,
  "seed": 43,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/02_multi_text_ee_constraint/constraints.json
================================================
[
  {
    "type": "left-hand",
    "frame_indices": [
      40,
      155
    ],
    "local_joints_rot": [
      [
        [
          -0.20672118663787842,
          0.0004979433142580092,
          0.010066316463053226
        ],
        [
          0.0789145976305008,
          0.0008333905134350061,
          -5.267082087812014e-05
        ],
        [
          -0.1686924546957016,
          -0.0027884345036000013,
          0.0520743690431118
        ],
        [
          0.000989485066384077,
          0.1385614573955536,
          0.0005803265958093107
        ],
        [
          1.0274103879928589,
          -0.0004089517460670322,
          0.0007986496202647686
        ],
        [
          -0.39034226536750793,
          -0.001306047779507935,
          -4.922552761854604e-05
        ],
        [
          0.0023066187277436256,
          -0.0007853881106711924,
          -0.0062883589416742325
        ],
        [
          4.49517356173601e-05,
          0.0033443598076701164,
          -0.0014551420463249087
        ],
        [
          0.07268467545509338,
          -0.0011258760932832956,
          -3.953919076593593e-05
        ],
        [
          -0.1719113141298294,
          0.018712127581238747,
          0.06082615628838539
        ],
        [
          0.0011432868195697665,
          0.02744375728070736,
          0.0025501118507236242
        ],
        [
          0.41685307025909424,
          -0.002692570211365819,
          -0.0006283970433287323
        ],
        [
          -0.1283608227968216,
          0.0030534265097230673,
          0.00016949126438703388
        ],
        [
          -0.005590266548097134,
          0.0014076301595196128,
          -0.038615260273218155
        ],
        [
          -0.00013014793512411416,
          0.001360177993774414,
          6.41088408883661e-05
        ],
        [
          0.00010043015936389565,
          -0.01370090153068304,
          -0.00014910128084011376
        ],
        [
          0.00023336269077844918,
          0.0025421029422432184,
          0.04833226650953293
        ],
        [
          0.056574925780296326,
          0.0006874562823213637,
          0.0004548647266346961
        ],
        [
          -0.37481847405433655,
          -0.054357241839170456,
          0.2803272306919098
        ],
        [
          0.0013725318713113666,
          0.009074348025023937,
          -0.0021504403557628393
        ],
        [
          -0.0012184121878817677,
          -0.4267229437828064,
          0.011203057132661343
        ],
        [
          1.255251407623291,
          0.0009449978824704885,
          0.0010158077348023653
        ],
        [
          -0.003570390399545431,
          -0.003947308287024498,
          -0.5030224323272705
        ],
        [
          0.1881941556930542,
          -0.000495748536195606,
          0.0016725400928407907
        ],
        [
          -0.002223622752353549,
          0.11821465194225311,
          0.007546884939074516
        ],
        [
          -0.00137770373839885,
          -0.0031452146358788013,
          -0.0015015294775366783
        ],
        [
          -0.3751647472381592,
          0.05314668267965317,
          -0.28086331486701965
        ],
        [
          -0.007756246719509363,
          -0.016310883685946465,
          -0.02847120724618435
        ],
        [
          -0.0002517815155442804,
          0.427451491355896,
          3.640262002591044e-05
        ],
        [
          1.2455408573150635,
          -0.0014789876295253634,
          0.0008519256953150034
        ],
        [
          0.004311776254326105,
          0.009671058505773544,
          0.5968337655067444
        ],
        [
          0.1335560381412506,
          0.0011528844479471445,
          -0.0008361327927559614
        ],
        [
          0.001167859067209065,
          -0.1551152616739273,
          0.00019725598394870758
        ],
        [
          -0.0014258474111557007,
          0.0034801543224602938,
          0.0009809854673221707
        ]
      ],
      [
        [
          -0.047659896314144135,
          -0.11130385845899582,
          -0.0020901868119835854
        ],
        [
          -1.5705475807189941,
          -0.0014125468442216516,
          -0.0008221857133321464
        ],
        [
          -0.16147980093955994,
          0.014729475602507591,
          0.4458121657371521
        ],
        [
          -0.00045561062870547175,
          -0.1160486489534378,
          -0.006125911604613066
        ],
        [
          2.811251401901245,
          0.0016747766640037298,
          -0.005349006038159132
        ],
        [
          -0.8591147065162659,
          0.0037903853226453066,
          0.00048354381578974426
        ],
        [
          0.006445891689509153,
          -0.0036706889513880014,
          -0.03472399711608887
        ],
        [
          -0.001481462037190795,
          0.0015367366140708327,
          -0.0015593112912029028
        ],
        [
          -1.5751848220825195,
          0.001112997648306191,
          0.0009848373010754585
        ],
        [
          -0.16862420737743378,
          -0.016877643764019012,
          -0.26229384541511536
        ],
        [
          -9.055795817403123e-05,
          0.09453120082616806,
          -0.0134742371737957
        ],
        [
          2.811314344406128,
          0.003919574897736311,
          0.005575981922447681
        ],
        [
          -0.8299098014831543,
          -0.003791244002059102,
          0.0012802339624613523
        ],
        [
          0.005852710455656052,
          0.005849692039191723,
          0.1632416546344757
        ],
        [
          -0.0015579514438286424,
          9.288851288147271e-05,
          0.001196552417241037
        ],
        [
          0.00043879495933651924,
          0.04429133981466293,
          0.0002551022043917328
        ],
        [
          -0.0019886596128344536,
          0.008745947852730751,
          -0.00962099153548479
        ],
        [
          0.5197923183441162,
          -0.0010678194230422378,
          0.0002590256044641137
        ],
        [
          -0.9051622152328491,
          -0.12138096988201141,
          0.25749173760414124
        ],
        [
          0.010689850896596909,
          -0.01072163600474596,
          0.20382197201251984
        ],
        [
          -0.0009684870601631701,
          -0.5894762873649597,
          0.0032688004430383444
        ],
        [
          1.30536949634552,
          -0.002206705743446946,
          -0.0020471925381571054
        ],
        [
          0.0067055909894406796,
          -0.015674468129873276,
          -0.9086763262748718
        ],
        [
          -0.26612186431884766,
          -0.00016191616305150092,
          0.002851327648386359
        ],
        [
          0.003539646975696087,
          0.20451955497264862,
          -0.02575569413602352
        ],
        [
          0.003367731347680092,
          0.0018452388467267156,
          -0.00026573429931886494
        ],
        [
          -0.9464634656906128,
          0.12737642228603363,
          -0.2577688992023468
        ],
        [
          0.00046661958913318813,
          -0.008693858049809933,
          -0.19606870412826538
        ],
        [
          -0.0058177076280117035,
          0.6349377036094666,
          -0.0003108184027951211
        ],
        [
          1.4694209098815918,
          0.0046353572979569435,
          0.002392316237092018
        ],
        [
          0.022281549870967865,
          0.006433307193219662,
          1.1441218852996826
        ],
        [
          -0.16217999160289764,
          -0.0005673008854500949,
          -0.0028868752997368574
        ],
        [
          0.0011142585426568985,
          0.036793302744627,
          0.06873425096273422
        ],
        [
          0.001964340452104807,
          -0.004202086944133043,
          0.0034294212237000465
        ]
      ]
    ],
    "root_positions": [
      [
        0.014979152008891106,
        0.7896444201469421,
        0.8725281357765198
      ],
      [
        0.12546521425247192,
        0.30551770329475403,
        2.3331315517425537
      ]
    ],
    "smooth_root_2d": [
      [
        0.014979152008891106,
        0.8725281357765198
      ],
      [
        0.12546521425247192,
        2.3331315517425537
      ]
    ]
  },
  {
    "type": "right-hand",
    "frame_indices": [
      40,
      155
    ],
    "local_joints_rot": [
      [
        [
          -0.20672118663787842,
          0.0004979433142580092,
          0.010066316463053226
        ],
        [
          0.0789145976305008,
          0.0008333905134350061,
          -5.267082087812014e-05
        ],
        [
          -0.1686924546957016,
          -0.0027884345036000013,
          0.0520743690431118
        ],
        [
          0.000989485066384077,
          0.1385614573955536,
          0.0005803265958093107
        ],
        [
          1.0274103879928589,
          -0.0004089517460670322,
          0.0007986496202647686
        ],
        [
          -0.39034226536750793,
          -0.001306047779507935,
          -4.922552761854604e-05
        ],
        [
          0.0023066187277436256,
          -0.0007853881106711924,
          -0.0062883589416742325
        ],
        [
          4.49517356173601e-05,
          0.0033443598076701164,
          -0.0014551420463249087
        ],
        [
          0.07268467545509338,
          -0.0011258760932832956,
          -3.953919076593593e-05
        ],
        [
          -0.1719113141298294,
          0.018712127581238747,
          0.06082615628838539
        ],
        [
          0.0011432868195697665,
          0.02744375728070736,
          0.0025501118507236242
        ],
        [
          0.41685307025909424,
          -0.002692570211365819,
          -0.0006283970433287323
        ],
        [
          -0.1283608227968216,
          0.0030534265097230673,
          0.00016949126438703388
        ],
        [
          -0.005590266548097134,
          0.0014076301595196128,
          -0.038615260273218155
        ],
        [
          -0.00013014793512411416,
          0.001360177993774414,
          6.41088408883661e-05
        ],
        [
          0.00010043015936389565,
          -0.01370090153068304,
          -0.00014910128084011376
        ],
        [
          0.00023336269077844918,
          0.0025421029422432184,
          0.04833226650953293
        ],
        [
          0.056574925780296326,
          0.0006874562823213637,
          0.0004548647266346961
        ],
        [
          -0.37481847405433655,
          -0.054357241839170456,
          0.2803272306919098
        ],
        [
          0.0013725318713113666,
          0.009074348025023937,
          -0.0021504403557628393
        ],
        [
          -0.0012184121878817677,
          -0.4267229437828064,
          0.011203057132661343
        ],
        [
          1.255251407623291,
          0.0009449978824704885,
          0.0010158077348023653
        ],
        [
          -0.003570390399545431,
          -0.003947308287024498,
          -0.5030224323272705
        ],
        [
          0.1881941556930542,
          -0.000495748536195606,
          0.0016725400928407907
        ],
        [
          -0.002223622752353549,
          0.11821465194225311,
          0.007546884939074516
        ],
        [
          -0.00137770373839885,
          -0.0031452146358788013,
          -0.0015015294775366783
        ],
        [
          -0.3751647472381592,
          0.05314668267965317,
          -0.28086331486701965
        ],
        [
          -0.007756246719509363,
          -0.016310883685946465,
          -0.02847120724618435
        ],
        [
          -0.0002517815155442804,
          0.427451491355896,
          3.640262002591044e-05
        ],
        [
          1.2455408573150635,
          -0.0014789876295253634,
          0.0008519256953150034
        ],
        [
          0.004311776254326105,
          0.009671058505773544,
          0.5968337655067444
        ],
        [
          0.1335560381412506,
          0.0011528844479471445,
          -0.0008361327927559614
        ],
        [
          0.001167859067209065,
          -0.1551152616739273,
          0.00019725598394870758
        ],
        [
          -0.0014258474111557007,
          0.0034801543224602938,
          0.0009809854673221707
        ]
      ],
      [
        [
          -0.047659896314144135,
          -0.11130385845899582,
          -0.0020901868119835854
        ],
        [
          -1.5705475807189941,
          -0.0014125468442216516,
          -0.0008221857133321464
        ],
        [
          -0.16147980093955994,
          0.014729475602507591,
          0.4458121657371521
        ],
        [
          -0.00045561062870547175,
          -0.1160486489534378,
          -0.006125911604613066
        ],
        [
          2.811251401901245,
          0.0016747766640037298,
          -0.005349006038159132
        ],
        [
          -0.8591147065162659,
          0.0037903853226453066,
          0.00048354381578974426
        ],
        [
          0.006445891689509153,
          -0.0036706889513880014,
          -0.03472399711608887
        ],
        [
          -0.001481462037190795,
          0.0015367366140708327,
          -0.0015593112912029028
        ],
        [
          -1.5751848220825195,
          0.001112997648306191,
          0.0009848373010754585
        ],
        [
          -0.16862420737743378,
          -0.016877643764019012,
          -0.26229384541511536
        ],
        [
          -9.055795817403123e-05,
          0.09453120082616806,
          -0.0134742371737957
        ],
        [
          2.811314344406128,
          0.003919574897736311,
          0.005575981922447681
        ],
        [
          -0.8299098014831543,
          -0.003791244002059102,
          0.0012802339624613523
        ],
        [
          0.005852710455656052,
          0.005849692039191723,
          0.1632416546344757
        ],
        [
          -0.0015579514438286424,
          9.288851288147271e-05,
          0.001196552417241037
        ],
        [
          0.00043879495933651924,
          0.04429133981466293,
          0.0002551022043917328
        ],
        [
          -0.0019886596128344536,
          0.008745947852730751,
          -0.00962099153548479
        ],
        [
          0.5197923183441162,
          -0.0010678194230422378,
          0.0002590256044641137
        ],
        [
          -0.9051622152328491,
          -0.12138096988201141,
          0.25749173760414124
        ],
        [
          0.010689850896596909,
          -0.01072163600474596,
          0.20382197201251984
        ],
        [
          -0.0009684870601631701,
          -0.5894762873649597,
          0.0032688004430383444
        ],
        [
          1.30536949634552,
          -0.002206705743446946,
          -0.0020471925381571054
        ],
        [
          0.0067055909894406796,
          -0.015674468129873276,
          -0.9086763262748718
        ],
        [
          -0.26612186431884766,
          -0.00016191616305150092,
          0.002851327648386359
        ],
        [
          0.003539646975696087,
          0.20451955497264862,
          -0.02575569413602352
        ],
        [
          0.003367731347680092,
          0.0018452388467267156,
          -0.00026573429931886494
        ],
        [
          -0.9464634656906128,
          0.12737642228603363,
          -0.2577688992023468
        ],
        [
          0.00046661958913318813,
          -0.008693858049809933,
          -0.19606870412826538
        ],
        [
          -0.0058177076280117035,
          0.6349377036094666,
          -0.0003108184027951211
        ],
        [
          1.4694209098815918,
          0.0046353572979569435,
          0.002392316237092018
        ],
        [
          0.022281549870967865,
          0.006433307193219662,
          1.1441218852996826
        ],
        [
          -0.16217999160289764,
          -0.0005673008854500949,
          -0.0028868752997368574
        ],
        [
          0.0011142585426568985,
          0.036793302744627,
          0.06873425096273422
        ],
        [
          0.001964340452104807,
          -0.004202086944133043,
          0.0034294212237000465
        ]
      ]
    ],
    "root_positions": [
      [
        0.014979152008891106,
        0.7896444201469421,
        0.8725281357765198
      ],
      [
        0.12546521425247192,
        0.30551770329475403,
        2.3331315517425537
      ]
    ],
    "smooth_root_2d": [
      [
        0.014979152008891106,
        0.8725281357765198
      ],
      [
        0.12546521425247192,
        2.3331315517425537
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/02_multi_text_ee_constraint/meta.json
================================================
{
  "texts": [
    "A person walks forward while carrying a box",
    "A person sets a box down onto the ground"
  ],
  "durations": [
    3.533333333333333,
    4.066666666666666
  ],
  "num_samples": 1,
  "seed": 60,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 1.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/03_full_body_keyframes/constraints.json
================================================
[
  {
    "type": "fullbody",
    "frame_indices": [
      59,
      106,
      148
    ],
    "local_joints_rot": [
      [
        [
          0.42420727014541626,
          0.058721136301755905,
          -0.1945635825395584
        ],
        [
          -0.5268475413322449,
          -0.0005157420528121293,
          0.0004701620200648904
        ],
        [
          -0.17267920076847076,
          0.027239520102739334,
          0.36560261249542236
        ],
        [
          0.004160718061029911,
          -0.22976335883140564,
          0.010524176992475986
        ],
        [
          1.5825881958007812,
          -0.01814083196222782,
          -0.00019598894868977368
        ],
        [
          -0.8827329277992249,
          0.009902671910822392,
          -0.00021610780095215887
        ],
        [
          0.0067768096923828125,
          -0.013547217473387718,
          -0.16673408448696136
        ],
        [
          0.0006806282908655703,
          0.004601094871759415,
          -0.0043960982002317905
        ],
        [
          -1.4894901514053345,
          -0.003371267579495907,
          -0.001970127457752824
        ],
        [
          -0.17904962599277496,
          0.004051337484270334,
          0.19225701689720154
        ],
        [
          -0.0033012183848768473,
          -0.29656991362571716,
          0.004984850063920021
        ],
        [
          1.5931552648544312,
          -0.007282367907464504,
          -0.0052862209267914295
        ],
        [
          -0.35364261269569397,
          0.0049067274667322636,
          0.0010333984391763806
        ],
        [
          0.0023804877419024706,
          -0.005421861540526152,
          -0.19129839539527893
        ],
        [
          0.0008946731686592102,
          0.0049979668110609055,
          -0.0008540445705875754
        ],
        [
          -0.00037546976818703115,
          -0.09826900064945221,
          0.0006841858848929405
        ],
        [
          0.004415650386363268,
          0.0112489964812994,
          0.025344429537653923
        ],
        [
          0.5182019472122192,
          0.002875699894502759,
          0.002064053900539875
        ],
        [
          -0.7899102568626404,
          -0.11301380395889282,
          0.261331170797348
        ],
        [
          -0.004763631150126457,
          0.003188431030139327,
          0.191846564412117
        ],
        [
          -0.0006821855786256492,
          -0.24938665330410004,
          0.0013275814708322287
        ],
        [
          1.1367335319519043,
          0.0038948820438236,
          0.0009569167159497738
        ],
        [
          0.006261332891881466,
          0.020894864574074745,
          -1.050469160079956
        ],
        [
          0.06118401885032654,
          0.0005131644429638982,
          0.00042430072790011764
        ],
        [
          0.0017778673209249973,
          0.08777552843093872,
          -0.044312309473752975
        ],
        [
          -0.0006084830965846777,
          0.0022449076641350985,
          -0.001873409142717719
        ],
        [
          0.33878403902053833,
          -0.04740850627422333,
          -0.2796333432197571
        ],
        [
          0.02221747301518917,
          0.013649695552885532,
          -0.11847231537103653
        ],
        [
          0.007714178413152695,
          0.6182990074157715,
          0.009067214094102383
        ],
        [
          0.8923805952072144,
          -0.00016622581460978836,
          0.0021162345074117184
        ],
        [
          0.0038995807990431786,
          -0.006832453887909651,
          0.3025287687778473
        ],
        [
          0.03307999297976494,
          0.0005516205565072596,
          0.0009820020059123635
        ],
        [
          0.0015379488468170166,
          -0.08221427351236343,
          -0.014401843771338463
        ],
        [
          -0.00022057670867070556,
          0.002010792726650834,
          0.0012923656031489372
        ]
      ],
      [
        [
          -0.08197958767414093,
          0.10326994955539703,
          -0.1510602980852127
        ],
        [
          0.28157129883766174,
          0.0011461800895631313,
          0.000703590689226985
        ],
        [
          -0.182321235537529,
          0.05269569158554077,
          0.2730983793735504
        ],
        [
          -0.0003947282093577087,
          0.09641454368829727,
          0.0040251282043755054
        ],
        [
          1.089223861694336,
          -0.00700604822486639,
          -0.002539312234148383
        ],
        [
          -0.09248486906290054,
          0.003849609522148967,
          0.0016473153373226523
        ],
        [
          -0.010541710071265697,
          0.004344945307821035,
          0.07663393765687943
        ],
        [
          -0.00044715296826325357,
          -0.004340745974332094,
          0.007171581499278545
        ],
        [
          -0.3379390239715576,
          0.0015806800220161676,
          -0.0003471111413091421
        ],
        [
          -0.1781967729330063,
          0.016616491600871086,
          0.1652776598930359
        ],
        [
          -0.002019439358264208,
          -0.11581386625766754,
          0.0009603232610970736
        ],
        [
          0.6794841289520264,
          -5.403390241554007e-05,
          -0.0012657493352890015
        ],
        [
          -0.09013757854700089,
          0.0018549489323049784,
          -0.000238976048422046
        ],
        [
          -0.0009166855015791953,
          -0.0007138565997593105,
          -0.0742788091301918
        ],
        [
          -0.0009655999601818621,
          0.0029521933756768703,
          -0.00039851426845416427
        ],
        [
          -0.0006129079265519977,
          -0.19495022296905518,
          -0.0019512351136654615
        ],
        [
          0.0019297772087156773,
          -0.0025066917296499014,
          0.1518552601337433
        ],
        [
          0.18073193728923798,
          -0.0008597049745731056,
          0.00023304206843022257
        ],
        [
          -0.19048453867435455,
          -0.02173178642988205,
          0.2785468101501465
        ],
        [
          0.0032724339980632067,
          0.001481848070397973,
          0.00837984960526228
        ],
        [
          0.0037242062389850616,
          -0.19455766677856445,
          0.009616612456738949
        ],
        [
          -0.19767794013023376,
          0.004192049615085125,
          0.004219892434775829
        ],
        [
          -0.018522148951888084,
          0.01758752018213272,
          -1.4997444152832031
        ],
        [
          -0.07066819816827774,
          -0.0006776255904696882,
          0.00122307357378304
        ],
        [
          0.007704276591539383,
          0.14503517746925354,
          0.0951184555888176
        ],
        [
          0.004533262457698584,
          -0.0066575342789292336,
          -0.010643035173416138
        ],
        [
          0.3773331642150879,
          -0.05414784327149391,
          -0.2780730128288269
        ],
        [
          0.003753547091037035,
          0.002539943205192685,
          0.12321871519088745
        ],
        [
          -0.004724413156509399,
          0.46992960572242737,
          0.001832474721595645
        ],
        [
          1.2976007461547852,
          0.0007234009681269526,
          -0.001626322278752923
        ],
        [
          -0.0016050372505560517,
          -0.00880438182502985,
          0.17947044968605042
        ],
        [
          0.05334911122918129,
          -0.00018671243742574006,
          0.0010833276901394129
        ],
        [
          -0.0015367609448730946,
          -0.05425700917840004,
          0.01668459363281727
        ],
        [
          -0.00021225935779511929,
          0.001713683595880866,
          0.0009809889597818255
        ]
      ],
      [
        [
          -0.21817633509635925,
          -0.012708673253655434,
          -0.029821090400218964
        ],
        [
          0.3743710219860077,
          0.0007941523799672723,
          0.00032366320374421775
        ],
        [
          -0.16750676929950714,
          0.003018906805664301,
          0.07928019016981125
        ],
        [
          -0.0003895726113114506,
          0.030501781031489372,
          0.0013912678696215153
        ],
        [
          0.2578306794166565,
          -0.0026517061050981283,
          -0.0001139347514254041
        ],
        [
          -0.227533221244812,
          0.0004564583650790155,
          -0.0004175934591330588
        ],
        [
          -0.0015815469669178128,
          0.0026496825739741325,
          -0.017801448702812195
        ],
        [
          0.00024288007989525795,
          0.000392801477573812,
          -2.9845070457668044e-05
        ],
        [
          0.31938642263412476,
          -0.0006790655897930264,
          -0.0004276619874872267
        ],
        [
          -0.17199693620204926,
          0.007707139942795038,
          0.014987054280936718
        ],
        [
          0.0012992072151973844,
          0.003620905103161931,
          -0.001210421440191567
        ],
        [
          0.22853288054466248,
          -0.0018797506345435977,
          -0.0002660619793459773
        ],
        [
          -0.1335543692111969,
          0.0010313205420970917,
          0.0001083972238120623
        ],
        [
          0.003520265920087695,
          0.0035283963661640882,
          0.016698163002729416
        ],
        [
          0.0001443400833522901,
          -0.001745356246829033,
          -2.3336755475611426e-05
        ],
        [
          0.0003554633294697851,
          -0.05629483610391617,
          -0.0006463310564868152
        ],
        [
          -0.00298635708168149,
          0.0020182463340461254,
          -0.03614736720919609
        ],
        [
          0.21955031156539917,
          0.0005465149879455566,
          0.00011243963672313839
        ],
        [
          -0.0715053528547287,
          -0.010282701812684536,
          0.28057143092155457
        ],
        [
          0.0007245761225931346,
          0.0019379559671506286,
          -0.018530432134866714
        ],
        [
          -0.0020012110471725464,
          -0.5585712194442749,
          0.0002525273594073951
        ],
        [
          1.1451164484024048,
          0.000756395107600838,
          -0.00042264885269105434
        ],
        [
          -0.004087591078132391,
          -0.0022635578643530607,
          -0.1811828911304474
        ],
        [
          0.15393203496932983,
          -0.00010327681229682639,
          0.000951180059928447
        ],
        [
          -0.0005707733216695487,
          0.07005079090595245,
          -0.0018504050094634295
        ],
        [
          -0.0013123765820637345,
          -0.0004375300486572087,
          0.0002970081695821136
        ],
        [
          -0.09115279465913773,
          0.013008617796003819,
          -0.2808595299720764
        ],
        [
          0.0015214721206575632,
          -0.007811791729182005,
          0.031220799311995506
        ],
        [
          -0.00048553026863373816,
          0.5777612328529358,
          0.0003351669874973595
        ],
        [
          1.0913182497024536,
          0.0011191898956894875,
          -0.0027903772424906492
        ],
        [
          0.000775794149376452,
          0.00010774911061162129,
          0.10287072509527206
        ],
        [
          0.0997936949133873,
          0.0003015398688148707,
          -0.0006937433499842882
        ],
        [
          0.0003619014751166105,
          -0.18787385523319244,
          -0.0010270585771650076
        ],
        [
          -0.001584835583344102,
          0.0037561857607215643,
          -0.002414965769276023
        ]
      ]
    ],
    "root_positions": [
      [
        -0.17535515129566193,
        0.5689253807067871,
        0.9417929649353027
      ],
      [
        -0.16934014856815338,
        0.7382326722145081,
        2.169330596923828
      ],
      [
        -0.1823902279138565,
        0.7819305658340454,
        2.954490900039673
      ]
    ],
    "smooth_root_2d": [
      [
        -0.17535515129566193,
        0.9417929649353027
      ],
      [
        -0.16934014856815338,
        2.169330596923828
      ],
      [
        -0.1823902279138565,
        2.954490900039673
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/03_full_body_keyframes/meta.json
================================================
{
  "text": "A person walking forward picks up something off the ground",
  "duration": 5.0,
  "num_samples": 1,
  "seed": 51,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 1.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/04_ee_constraint/constraints.json
================================================
[
  {
    "type": "right-hand",
    "frame_indices": [
      129,
      93,
      0
    ],
    "local_joints_rot": [
      [
        [
          -0.11223886162042618,
          0.20531758666038513,
          0.13568778336048126
        ],
        [
          0.1075688898563385,
          0.0032202948350459337,
          0.0006892754463478923
        ],
        [
          -0.17058254778385162,
          -0.011657492257654667,
          -0.23103317618370056
        ],
        [
          -0.02866872400045395,
          0.4262913167476654,
          -0.010209682397544384
        ],
        [
          0.2924644649028778,
          0.007188746705651283,
          0.0005000674282200634
        ],
        [
          -0.13080132007598877,
          -0.0029640060383826494,
          -0.0007075564353726804
        ],
        [
          -0.005761375650763512,
          -0.002191383158788085,
          0.15397773683071136
        ],
        [
          -0.00023041102394927293,
          -0.0010889451950788498,
          0.0007837787852622569
        ],
        [
          -0.3537895977497101,
          -0.0006238390924409032,
          -0.0010272490326315165
        ],
        [
          -0.16032733023166656,
          -0.02506295032799244,
          -0.22620464861392975
        ],
        [
          0.0138308797031641,
          0.21655774116516113,
          0.0317748561501503
        ],
        [
          1.5745534896850586,
          0.003732866607606411,
          0.0021063678432255983
        ],
        [
          -0.17066748440265656,
          -0.002285068854689598,
          -0.0029538189992308617
        ],
        [
          0.02313886024057865,
          -0.07020875811576843,
          -0.05658446252346039
        ],
        [
          2.5580025976523757e-05,
          0.004435115493834019,
          -0.006514436099678278
        ],
        [
          0.0015886364271864295,
          -0.292732834815979,
          -0.0014166575856506824
        ],
        [
          -0.008558829315006733,
          0.0066470191814005375,
          -0.010221566073596478
        ],
        [
          0.10141321271657944,
          -0.0028386565390974283,
          -0.0006978976307436824
        ],
        [
          0.002506372518837452,
          0.001101600006222725,
          0.2779805362224579
        ],
        [
          0.020367039367556572,
          -0.028616085648536682,
          0.0971180647611618
        ],
        [
          -0.011572631075978279,
          -0.5930124521255493,
          -0.026975814253091812
        ],
        [
          0.9286840558052063,
          -0.00046807233593426645,
          -0.00013331411173567176
        ],
        [
          -0.031172338873147964,
          -0.04484722763299942,
          0.03643424063920975
        ],
        [
          0.03150894120335579,
          -0.00101278827060014,
          0.0015338404336944222
        ],
        [
          0.0005915925721637905,
          0.0930531769990921,
          -0.028835415840148926
        ],
        [
          -0.001440802589058876,
          0.0010614224011078477,
          0.0006542576011270285
        ],
        [
          -0.4149414598941803,
          0.06656259298324585,
          -0.2730332314968109
        ],
        [
          -0.006371266208589077,
          -0.02150307223200798,
          -1.3590242862701416
        ],
        [
          0.00956002902239561,
          -0.17155548930168152,
          0.026624836027622223
        ],
        [
          0.8084958791732788,
          -0.003991501871496439,
          0.0007233448559418321
        ],
        [
          -0.020737944170832634,
          -0.011397535912692547,
          0.14019189774990082
        ],
        [
          -0.18261606991291046,
          0.005134414881467819,
          -0.001045998651534319
        ],
        [
          -0.028427572920918465,
          -0.6557883620262146,
          0.038063470274209976
        ],
        [
          0.005555277690291405,
          0.012246276251971722,
          0.004770371131598949
        ]
      ],
      [
        [
          -0.06392758339643478,
          0.3478183448314667,
          0.1171446293592453
        ],
        [
          0.12243298441171646,
          0.003146131755784154,
          0.00017438907525502145
        ],
        [
          -0.17841783165931702,
          -0.0256511103361845,
          -0.2805330455303192
        ],
        [
          -0.022625330835580826,
          0.348234087228775,
          -0.009928824380040169
        ],
        [
          0.28284141421318054,
          0.009495020844042301,
          0.0010556986089795828
        ],
        [
          -0.17478667199611664,
          -0.004891794174909592,
          -0.0013969563879072666
        ],
        [
          -0.002641322324052453,
          -0.005833400413393974,
          0.20226475596427917
        ],
        [
          -0.0009078677394427359,
          -0.002073301700875163,
          0.0012749496381729841
        ],
        [
          -0.48070675134658813,
          0.0005347213009372354,
          -0.0004243548901285976
        ],
        [
          -0.16694584488868713,
          -0.03390314802527428,
          -0.09055406600236893
        ],
        [
          0.009182179346680641,
          0.1743844896554947,
          0.01932411640882492
        ],
        [
          1.6481772661209106,
          0.0002097517135553062,
          0.0010239556431770325
        ],
        [
          -0.17133140563964844,
          0.0028362423181533813,
          -0.004689408931881189
        ],
        [
          0.025385459885001183,
          -0.06771048158407211,
          -0.011561849154531956
        ],
        [
          -0.00012663791130762547,
          0.001872184220701456,
          -0.002834505634382367
        ],
        [
          0.001523697399534285,
          -0.48211750388145447,
          -0.0005278618773445487
        ],
        [
          -0.00822246354073286,
          -0.00923906546086073,
          -0.01643195189535618
        ],
        [
          0.04035002365708351,
          -0.004922393709421158,
          -0.0005214703269302845
        ],
        [
          -0.02120170183479786,
          -0.000465662480564788,
          0.27964550256729126
        ],
        [
          0.042349521070718765,
          -0.043123405426740646,
          0.21025802195072174
        ],
        [
          -0.01620035618543625,
          -0.5838293433189392,
          -0.03403719887137413
        ],
        [
          1.1832103729248047,
          0.0004754749243147671,
          -0.0014872060855850577
        ],
        [
          -0.040768858045339584,
          -0.04618615657091141,
          0.04847611486911774
        ],
        [
          0.04482508823275566,
          -0.0005392982857301831,
          0.00035259113064967096
        ],
        [
          0.00015537742001470178,
          -0.024237608537077904,
          -0.003044326091185212
        ],
        [
          -0.0012453795643523335,
          0.004743263591080904,
          0.004625802394002676
        ],
        [
          -0.14595142006874084,
          0.0308919008821249,
          -0.2779163420200348
        ],
        [
          -0.03314027562737465,
          -0.07205720245838165,
          -1.3401029109954834
        ],
        [
          0.02448190003633499,
          -0.468079537153244,
          0.018310735002160072
        ],
        [
          0.9222347140312195,
          -0.00624655419960618,
          -0.0003706512216012925
        ],
        [
          0.0311859343200922,
          -0.01980999857187271,
          -0.4311404228210449
        ],
        [
          -0.05887744575738907,
          0.009565972723066807,
          0.0008855919586494565
        ],
        [
          -0.0638674795627594,
          -1.1882448196411133,
          -0.07744041085243225
        ],
        [
          0.002320833969861269,
          0.014880148693919182,
          0.00827236007899046
        ]
      ],
      [
        [
          0.028708748519420624,
          0.023731501772999763,
          -0.05906220152974129
        ],
        [
          0.36697518825531006,
          0.0020822372753173113,
          9.442192094866186e-06
        ],
        [
          -0.17328320443630219,
          -0.029694421216845512,
          -0.2592017650604248
        ],
        [
          -0.027558816596865654,
          0.44522055983543396,
          0.00263651879504323
        ],
        [
          0.45747342705726624,
          0.006375299766659737,
          0.000838644162286073
        ],
        [
          -0.29932498931884766,
          -0.0034287264570593834,
          -0.005712746176868677
        ],
        [
          0.010242770425975323,
          0.0686849057674408,
          0.12300582230091095
        ],
        [
          0.0019906593952327967,
          -0.006487288512289524,
          0.004740884527564049
        ],
        [
          -0.417245090007782,
          0.002172173699364066,
          -0.000527464144397527
        ],
        [
          -0.16229933500289917,
          -0.015825729817152023,
          0.26093363761901855
        ],
        [
          -0.01547759398818016,
          -0.4560239017009735,
          -0.001296655391342938
        ],
        [
          0.520811140537262,
          -0.016100304201245308,
          -0.0033653294667601585
        ],
        [
          -0.061035193502902985,
          0.013747301883995533,
          0.0011975782690569758
        ],
        [
          0.002211581217125058,
          0.013100380077958107,
          -0.41168421506881714
        ],
        [
          0.000723487522918731,
          -0.0009448538185097277,
          -0.0023157261312007904
        ],
        [
          -0.0008414603653363883,
          -0.22904154658317566,
          0.0037871438544243574
        ],
        [
          -0.004434449132531881,
          -0.0019493037834763527,
          0.04349867254495621
        ],
        [
          0.11113234609365463,
          -0.001496539101935923,
          -6.745033260813216e-06
        ],
        [
          0.03568394109606743,
          0.00850191805511713,
          0.2815527021884918
        ],
        [
          0.007574420887976885,
          -0.005988026969134808,
          -0.04585442319512367
        ],
        [
          -0.014899174682796001,
          -0.6360949277877808,
          0.014495083130896091
        ],
        [
          1.1318601369857788,
          -0.0009174949955195189,
          -0.008180576376616955
        ],
        [
          -0.038145799189805984,
          -0.05923198536038399,
          -0.04122990742325783
        ],
        [
          0.07719366252422333,
          -0.0010574767366051674,
          0.0009220906649716198
        ],
        [
          -0.0010063578374683857,
          0.12876589596271515,
          -0.021289559081196785
        ],
        [
          -0.0008511252817697823,
          -0.0003541657351888716,
          -0.0006832815706729889
        ],
        [
          0.10374817997217178,
          -0.014772959984838963,
          -0.28235113620758057
        ],
        [
          0.029763568192720413,
          0.00017807059339247644,
          0.007343007251620293
        ],
        [
          -0.0064206854440271854,
          0.3665950298309326,
          -0.0003897137939929962
        ],
        [
          1.0820642709732056,
          -0.0005379249923862517,
          -0.0039028781466186047
        ],
        [
          -0.004170380067080259,
          0.06480656564235687,
          -0.10721305757761002
        ],
        [
          -0.06350508332252502,
          0.0011865347623825073,
          -0.0005369586870074272
        ],
        [
          -0.0021817537490278482,
          -0.08756759762763977,
          -0.008148521184921265
        ],
        [
          0.00243115471675992,
          -0.003949992824345827,
          0.005949904676526785
        ]
      ]
    ],
    "root_positions": [
      [
        2.639763593673706,
        0.767427384853363,
        2.341259479522705
      ],
      [
        1.9461809396743774,
        0.7706995010375977,
        1.7243560552597046
      ],
      [
        0.003315839683637023,
        0.7415399551391602,
        -0.0012030001962557435
      ]
    ],
    "smooth_root_2d": [
      [
        2.639763593673706,
        2.341259479522705
      ],
      [
        1.9461809396743774,
        1.7243560552597046
      ],
      [
        0.003315839683637023,
        -0.0012030001962557435
      ]
    ]
  },
  {
    "type": "left-foot",
    "frame_indices": [
      93,
      0
    ],
    "local_joints_rot": [
      [
        [
          -0.06392758339643478,
          0.3478183448314667,
          0.1171446293592453
        ],
        [
          0.12243298441171646,
          0.003146131755784154,
          0.00017438907525502145
        ],
        [
          -0.17841783165931702,
          -0.0256511103361845,
          -0.2805330455303192
        ],
        [
          -0.022625330835580826,
          0.348234087228775,
          -0.009928824380040169
        ],
        [
          0.28284141421318054,
          0.009495020844042301,
          0.0010556986089795828
        ],
        [
          -0.17478667199611664,
          -0.004891794174909592,
          -0.0013969563879072666
        ],
        [
          -0.002641322324052453,
          -0.005833400413393974,
          0.20226475596427917
        ],
        [
          -0.0009078677394427359,
          -0.002073301700875163,
          0.0012749496381729841
        ],
        [
          -0.48070675134658813,
          0.0005347213009372354,
          -0.0004243548901285976
        ],
        [
          -0.16694584488868713,
          -0.03390314802527428,
          -0.09055406600236893
        ],
        [
          0.009182179346680641,
          0.1743844896554947,
          0.01932411640882492
        ],
        [
          1.6481772661209106,
          0.0002097517135553062,
          0.0010239556431770325
        ],
        [
          -0.17133140563964844,
          0.0028362423181533813,
          -0.004689408931881189
        ],
        [
          0.025385459885001183,
          -0.06771048158407211,
          -0.011561849154531956
        ],
        [
          -0.00012663791130762547,
          0.001872184220701456,
          -0.002834505634382367
        ],
        [
          0.001523697399534285,
          -0.48211750388145447,
          -0.0005278618773445487
        ],
        [
          -0.00822246354073286,
          -0.00923906546086073,
          -0.01643195189535618
        ],
        [
          0.04035002365708351,
          -0.004922393709421158,
          -0.0005214703269302845
        ],
        [
          -0.02120170183479786,
          -0.000465662480564788,
          0.27964550256729126
        ],
        [
          0.042349521070718765,
          -0.043123405426740646,
          0.21025802195072174
        ],
        [
          -0.01620035618543625,
          -0.5838293433189392,
          -0.03403719887137413
        ],
        [
          1.1832103729248047,
          0.0004754749243147671,
          -0.0014872060855850577
        ],
        [
          -0.040768858045339584,
          -0.04618615657091141,
          0.04847611486911774
        ],
        [
          0.04482508823275566,
          -0.0005392982857301831,
          0.00035259113064967096
        ],
        [
          0.00015537742001470178,
          -0.024237608537077904,
          -0.003044326091185212
        ],
        [
          -0.0012453795643523335,
          0.004743263591080904,
          0.004625802394002676
        ],
        [
          -0.14595142006874084,
          0.0308919008821249,
          -0.2779163420200348
        ],
        [
          -0.03314027562737465,
          -0.07205720245838165,
          -1.3401029109954834
        ],
        [
          0.02448190003633499,
          -0.468079537153244,
          0.018310735002160072
        ],
        [
          0.9222347140312195,
          -0.00624655419960618,
          -0.0003706512216012925
        ],
        [
          0.0311859343200922,
          -0.01980999857187271,
          -0.4311404228210449
        ],
        [
          -0.05887744575738907,
          0.009565972723066807,
          0.0008855919586494565
        ],
        [
          -0.0638674795627594,
          -1.1882448196411133,
          -0.07744041085243225
        ],
        [
          0.002320833969861269,
          0.014880148693919182,
          0.00827236007899046
        ]
      ],
      [
        [
          0.028708748519420624,
          0.023731501772999763,
          -0.05906220152974129
        ],
        [
          0.36697518825531006,
          0.0020822372753173113,
          9.442192094866186e-06
        ],
        [
          -0.17328320443630219,
          -0.029694421216845512,
          -0.2592017650604248
        ],
        [
          -0.027558816596865654,
          0.44522055983543396,
          0.00263651879504323
        ],
        [
          0.45747342705726624,
          0.006375299766659737,
          0.000838644162286073
        ],
        [
          -0.29932498931884766,
          -0.0034287264570593834,
          -0.005712746176868677
        ],
        [
          0.010242770425975323,
          0.0686849057674408,
          0.12300582230091095
        ],
        [
          0.0019906593952327967,
          -0.006487288512289524,
          0.004740884527564049
        ],
        [
          -0.417245090007782,
          0.002172173699364066,
          -0.000527464144397527
        ],
        [
          -0.16229933500289917,
          -0.015825729817152023,
          0.26093363761901855
        ],
        [
          -0.01547759398818016,
          -0.4560239017009735,
          -0.001296655391342938
        ],
        [
          0.520811140537262,
          -0.016100304201245308,
          -0.0033653294667601585
        ],
        [
          -0.061035193502902985,
          0.013747301883995533,
          0.0011975782690569758
        ],
        [
          0.002211581217125058,
          0.013100380077958107,
          -0.41168421506881714
        ],
        [
          0.000723487522918731,
          -0.0009448538185097277,
          -0.0023157261312007904
        ],
        [
          -0.0008414603653363883,
          -0.22904154658317566,
          0.0037871438544243574
        ],
        [
          -0.004434449132531881,
          -0.0019493037834763527,
          0.04349867254495621
        ],
        [
          0.11113234609365463,
          -0.001496539101935923,
          -6.745033260813216e-06
        ],
        [
          0.03568394109606743,
          0.00850191805511713,
          0.2815527021884918
        ],
        [
          0.007574420887976885,
          -0.005988026969134808,
          -0.04585442319512367
        ],
        [
          -0.014899174682796001,
          -0.6360949277877808,
          0.014495083130896091
        ],
        [
          1.1318601369857788,
          -0.0009174949955195189,
          -0.008180576376616955
        ],
        [
          -0.038145799189805984,
          -0.05923198536038399,
          -0.04122990742325783
        ],
        [
          0.07719366252422333,
          -0.0010574767366051674,
          0.0009220906649716198
        ],
        [
          -0.0010063578374683857,
          0.12876589596271515,
          -0.021289559081196785
        ],
        [
          -0.0008511252817697823,
          -0.0003541657351888716,
          -0.0006832815706729889
        ],
        [
          0.10374817997217178,
          -0.014772959984838963,
          -0.28235113620758057
        ],
        [
          0.029763568192720413,
          0.00017807059339247644,
          0.007343007251620293
        ],
        [
          -0.0064206854440271854,
          0.3665950298309326,
          -0.0003897137939929962
        ],
        [
          1.0820642709732056,
          -0.0005379249923862517,
          -0.0039028781466186047
        ],
        [
          -0.004170380067080259,
          0.06480656564235687,
          -0.10721305757761002
        ],
        [
          -0.06350508332252502,
          0.0011865347623825073,
          -0.0005369586870074272
        ],
        [
          -0.0021817537490278482,
          -0.08756759762763977,
          -0.008148521184921265
        ],
        [
          0.00243115471675992,
          -0.003949992824345827,
          0.005949904676526785
        ]
      ]
    ],
    "root_positions": [
      [
        1.9461809396743774,
        0.7706995010375977,
        1.7243560552597046
      ],
      [
        0.003315839683637023,
        0.7415399551391602,
        -0.0012030001962557435
      ]
    ],
    "smooth_root_2d": [
      [
        1.9461809396743774,
        1.7243560552597046
      ],
      [
        0.003315839683637023,
        -0.0012030001962557435
      ]
    ]
  },
  {
    "type": "right-foot",
    "frame_indices": [
      0
    ],
    "local_joints_rot": [
      [
        [
          0.028708748519420624,
          0.023731501772999763,
          -0.05906220152974129
        ],
        [
          0.36697518825531006,
          0.0020822372753173113,
          9.442192094866186e-06
        ],
        [
          -0.17328320443630219,
          -0.029694421216845512,
          -0.2592017650604248
        ],
        [
          -0.027558816596865654,
          0.44522055983543396,
          0.00263651879504323
        ],
        [
          0.45747342705726624,
          0.006375299766659737,
          0.000838644162286073
        ],
        [
          -0.29932498931884766,
          -0.0034287264570593834,
          -0.005712746176868677
        ],
        [
          0.010242770425975323,
          0.0686849057674408,
          0.12300582230091095
        ],
        [
          0.0019906593952327967,
          -0.006487288512289524,
          0.004740884527564049
        ],
        [
          -0.417245090007782,
          0.002172173699364066,
          -0.000527464144397527
        ],
        [
          -0.16229933500289917,
          -0.015825729817152023,
          0.26093363761901855
        ],
        [
          -0.01547759398818016,
          -0.4560239017009735,
          -0.001296655391342938
        ],
        [
          0.520811140537262,
          -0.016100304201245308,
          -0.0033653294667601585
        ],
        [
          -0.061035193502902985,
          0.013747301883995533,
          0.0011975782690569758
        ],
        [
          0.002211581217125058,
          0.013100380077958107,
          -0.41168421506881714
        ],
        [
          0.000723487522918731,
          -0.0009448538185097277,
          -0.0023157261312007904
        ],
        [
          -0.0008414603653363883,
          -0.22904154658317566,
          0.0037871438544243574
        ],
        [
          -0.004434449132531881,
          -0.0019493037834763527,
          0.04349867254495621
        ],
        [
          0.11113234609365463,
          -0.001496539101935923,
          -6.745033260813216e-06
        ],
        [
          0.03568394109606743,
          0.00850191805511713,
          0.2815527021884918
        ],
        [
          0.007574420887976885,
          -0.005988026969134808,
          -0.04585442319512367
        ],
        [
          -0.014899174682796001,
          -0.6360949277877808,
          0.014495083130896091
        ],
        [
          1.1318601369857788,
          -0.0009174949955195189,
          -0.008180576376616955
        ],
        [
          -0.038145799189805984,
          -0.05923198536038399,
          -0.04122990742325783
        ],
        [
          0.07719366252422333,
          -0.0010574767366051674,
          0.0009220906649716198
        ],
        [
          -0.0010063578374683857,
          0.12876589596271515,
          -0.021289559081196785
        ],
        [
          -0.0008511252817697823,
          -0.0003541657351888716,
          -0.0006832815706729889
        ],
        [
          0.10374817997217178,
          -0.014772959984838963,
          -0.28235113620758057
        ],
        [
          0.029763568192720413,
          0.00017807059339247644,
          0.007343007251620293
        ],
        [
          -0.0064206854440271854,
          0.3665950298309326,
          -0.0003897137939929962
        ],
        [
          1.0820642709732056,
          -0.0005379249923862517,
          -0.0039028781466186047
        ],
        [
          -0.004170380067080259,
          0.06480656564235687,
          -0.10721305757761002
        ],
        [
          -0.06350508332252502,
          0.0011865347623825073,
          -0.0005369586870074272
        ],
        [
          -0.0021817537490278482,
          -0.08756759762763977,
          -0.008148521184921265
        ],
        [
          0.00243115471675992,
          -0.003949992824345827,
          0.005949904676526785
        ]
      ]
    ],
    "root_positions": [
      [
        0.003315839683637023,
        0.7415399551391602,
        -0.0012030001962557435
      ]
    ],
    "smooth_root_2d": [
      [
        0.003315839683637023,
        -0.0012030001962557435
      ]
    ]
  },
  {
    "type": "left-hand",
    "frame_indices": [
      0
    ],
    "local_joints_rot": [
      [
        [
          0.028708748519420624,
          0.023731501772999763,
          -0.05906220152974129
        ],
        [
          0.36697518825531006,
          0.0020822372753173113,
          9.442192094866186e-06
        ],
        [
          -0.17328320443630219,
          -0.029694421216845512,
          -0.2592017650604248
        ],
        [
          -0.027558816596865654,
          0.44522055983543396,
          0.00263651879504323
        ],
        [
          0.45747342705726624,
          0.006375299766659737,
          0.000838644162286073
        ],
        [
          -0.29932498931884766,
          -0.0034287264570593834,
          -0.005712746176868677
        ],
        [
          0.010242770425975323,
          0.0686849057674408,
          0.12300582230091095
        ],
        [
          0.0019906593952327967,
          -0.006487288512289524,
          0.004740884527564049
        ],
        [
          -0.417245090007782,
          0.002172173699364066,
          -0.000527464144397527
        ],
        [
          -0.16229933500289917,
          -0.015825729817152023,
          0.26093363761901855
        ],
        [
          -0.01547759398818016,
          -0.4560239017009735,
          -0.001296655391342938
        ],
        [
          0.520811140537262,
          -0.016100304201245308,
          -0.0033653294667601585
        ],
        [
          -0.061035193502902985,
          0.013747301883995533,
          0.0011975782690569758
        ],
        [
          0.002211581217125058,
          0.013100380077958107,
          -0.41168421506881714
        ],
        [
          0.000723487522918731,
          -0.0009448538185097277,
          -0.0023157261312007904
        ],
        [
          -0.0008414603653363883,
          -0.22904154658317566,
          0.0037871438544243574
        ],
        [
          -0.004434449132531881,
          -0.0019493037834763527,
          0.04349867254495621
        ],
        [
          0.11113234609365463,
          -0.001496539101935923,
          -6.745033260813216e-06
        ],
        [
          0.03568394109606743,
          0.00850191805511713,
          0.2815527021884918
        ],
        [
          0.007574420887976885,
          -0.005988026969134808,
          -0.04585442319512367
        ],
        [
          -0.014899174682796001,
          -0.6360949277877808,
          0.014495083130896091
        ],
        [
          1.1318601369857788,
          -0.0009174949955195189,
          -0.008180576376616955
        ],
        [
          -0.038145799189805984,
          -0.05923198536038399,
          -0.04122990742325783
        ],
        [
          0.07719366252422333,
          -0.0010574767366051674,
          0.0009220906649716198
        ],
        [
          -0.0010063578374683857,
          0.12876589596271515,
          -0.021289559081196785
        ],
        [
          -0.0008511252817697823,
          -0.0003541657351888716,
          -0.0006832815706729889
        ],
        [
          0.10374817997217178,
          -0.014772959984838963,
          -0.28235113620758057
        ],
        [
          0.029763568192720413,
          0.00017807059339247644,
          0.007343007251620293
        ],
        [
          -0.0064206854440271854,
          0.3665950298309326,
          -0.0003897137939929962
        ],
        [
          1.0820642709732056,
          -0.0005379249923862517,
          -0.0039028781466186047
        ],
        [
          -0.004170380067080259,
          0.06480656564235687,
          -0.10721305757761002
        ],
        [
          -0.06350508332252502,
          0.0011865347623825073,
          -0.0005369586870074272
        ],
        [
          -0.0021817537490278482,
          -0.08756759762763977,
          -0.008148521184921265
        ],
        [
          0.00243115471675992,
          -0.003949992824345827,
          0.005949904676526785
        ]
      ]
    ],
    "root_positions": [
      [
        0.003315839683637023,
        0.7415399551391602,
        -0.0012030001962557435
      ]
    ],
    "smooth_root_2d": [
      [
        0.003315839683637023,
        -0.0012030001962557435
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/04_ee_constraint/meta.json
================================================
{
  "text": "A person walks diagonally to the left and waves at someone on their right",
  "duration": 4.966666666666667,
  "num_samples": 1,
  "seed": 44,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/05_root_path/constraints.json
================================================
[
  {
    "type": "root2d",
    "frame_indices": [
      0,
      1,
      2,
      3,
      4,
      5,
      6,
      7,
      8,
      9,
      10,
      11,
      12,
      13,
      14,
      15,
      16,
      17,
      18,
      19,
      20,
      21,
      22,
      23,
      24,
      25,
      26,
      27,
      28,
      29,
      30,
      31,
      32,
      33,
      34,
      35,
      36,
      37,
      38,
      39,
      40,
      41,
      42,
      43,
      44,
      45,
      46,
      47,
      48,
      49,
      50,
      51,
      52,
      53,
      54,
      55,
      56,
      57,
      58,
      59,
      60,
      61,
      62,
      63,
      64,
      65,
      66,
      67,
      68,
      69,
      70,
      71,
      72,
      73,
      74,
      75,
      76,
      77,
      78,
      79,
      80,
      81,
      82,
      83,
      84,
      85,
      86,
      87,
      88,
      89,
      90,
      91,
      92,
      93,
      94,
      95,
      96,
      97,
      98,
      99,
      100,
      101,
      102,
      103,
      104,
      105,
      106,
      107,
      108,
      109,
      110,
      111,
      112,
      113,
      114,
      115,
      116,
      117,
      118,
      119,
      120,
      121,
      122,
      123,
      124,
      125,
      126,
      127,
      128,
      129,
      130,
      131,
      132,
      133,
      134,
      135,
      136,
      137,
      138,
      139,
      140,
      141,
      142,
      143,
      144,
      145,
      146,
      147,
      148,
      149,
      150,
      151,
      152,
      153,
      154,
      155,
      156,
      157,
      158,
      159,
      160,
      161,
      162,
      163,
      164,
      165,
      166,
      167,
      168,
      169,
      170,
      171,
      172,
      173,
      174,
      175,
      176,
      177,
      178,
      179,
      180
    ],
    "smooth_root_2d": [
      [
        -0.024789854884147644,
        0.01764228567481041
      ],
      [
        -0.019911596551537514,
        0.03666473180055618
      ],
      [
        -0.015032900497317314,
        0.05568705126643181
      ],
      [
        -0.010153300128877163,
        0.07470902800559998
      ],
      [
        -0.005272198934108019,
        0.09373034536838531
      ],
      [
        -0.00038888092967681587,
        0.11275061219930649
      ],
      [
        0.004497467540204525,
        0.1317693293094635
      ],
      [
        0.009387745521962643,
        0.15078598260879517
      ],
      [
        0.014282921329140663,
        0.16979998350143433
      ],
      [
        0.019184017553925514,
        0.18881070613861084
      ],
      [
        0.024092093110084534,
        0.20781749486923218
      ],
      [
        0.029008235782384872,
        0.226819708943367
      ],
      [
        0.033933546394109726,
        0.24581670761108398
      ],
      [
        0.038869116455316544,
        0.2648078203201294
      ],
      [
        0.04381602630019188,
        0.2837924659252167
      ],
      [
        0.048775337636470795,
        0.30277004837989807
      ],
      [
        0.05374806746840477,
        0.321740061044693
      ],
      [
        0.058735184371471405,
        0.3407020568847656
      ],
      [
        0.06373759359121323,
        0.35965561866760254
      ],
      [
        0.06875615566968918,
        0.37860047817230225
      ],
      [
        0.07379162311553955,
        0.3975364565849304
      ],
      [
        0.07884468138217926,
        0.4164634943008423
      ],
      [
        0.08391592651605606,
        0.43538162112236023
      ],
      [
        0.08900584280490875,
        0.45429113507270813
      ],
      [
        0.09411482512950897,
        0.47319236397743225
      ],
      [
        0.0992431491613388,
        0.49208587408065796
      ],
      [
        0.10439097136259079,
        0.5109724998474121
      ],
      [
        0.1095583438873291,
        0.5298531651496887
      ],
      [
        0.11474518477916718,
        0.5487290620803833
      ],
      [
        0.11995130032300949,
        0.5676016807556152
      ],
      [
        0.12517637014389038,
        0.5864726901054382
      ],
      [
        0.13041996955871582,
        0.6053440570831299
      ],
      [
        0.13568153977394104,
        0.6242179274559021
      ],
      [
        0.1409604400396347,
        0.6430967450141907
      ],
      [
        0.14625589549541473,
        0.6619831919670105
      ],
      [
        0.15156707167625427,
        0.6808802485466003
      ],
      [
        0.15689301490783691,
        0.6997910141944885
      ],
      [
        0.16223272681236267,
        0.7187188267707825
      ],
      [
        0.16759774088859558,
        0.7376715540885925
      ],
      [
        0.17303690314292908,
        0.7566697001457214
      ],
      [
        0.17862369120121002,
        0.7757418751716614
      ],
      [
        0.1844315379858017,
        0.7949170470237732
      ],
      [
        0.19053390622138977,
        0.8142240643501282
      ],
      [
        0.19700415432453156,
        0.8336920142173767
      ],
      [
        0.20391567051410675,
        0.8533498644828796
      ],
      [
        0.21134179830551147,
        0.8732268214225769
      ],
      [
        0.21935580670833588,
        0.8933521509170532
      ],
      [
        0.22803090512752533,
        0.9137551784515381
      ],
      [
        0.23744019865989685,
        0.9344654083251953
      ],
      [
        0.24765664339065552,
        0.9555124640464783
      ],
      [
        0.2587530016899109,
        0.9769262671470642
      ],
      [
        0.2708017826080322,
        0.9987370371818542
      ],
      [
        0.2838752567768097,
        1.0209753513336182
      ],
      [
        0.29804527759552,
        1.0436722040176392
      ],
      [
        0.3133833110332489,
        1.0668591260910034
      ],
      [
        0.32996034622192383,
        1.0905684232711792
      ],
      [
        0.3478468656539917,
        1.1148326396942139
      ],
      [
        0.36711281538009644,
        1.1396855115890503
      ],
      [
        0.3878275454044342,
        1.1651611328125
      ],
      [
        0.41000601649284363,
        1.1912426948547363
      ],
      [
        0.4336090087890625,
        1.2178623676300049
      ],
      [
        0.45859649777412415,
        1.24495267868042
      ],
      [
        0.4849279224872589,
        1.272446632385254
      ],
      [
        0.5125620365142822,
        1.300277590751648
      ],
      [
        0.5414570569992065,
        1.3283785581588745
      ],
      [
        0.571570634841919,
        1.3566826581954956
      ],
      [
        0.6028600931167603,
        1.3851218223571777
      ],
      [
        0.6352822780609131,
        1.4136276245117188
      ],
      [
        0.6687941551208496,
        1.4421300888061523
      ],
      [
        0.7033523917198181,
        1.4705579280853271
      ],
      [
        0.7389140725135803,
        1.4988375902175903
      ],
      [
        0.7754364013671875,
        1.5268937349319458
      ],
      [
        0.8128772974014282,
        1.554648518562317
      ],
      [
        0.8511953353881836,
        1.5820214748382568
      ],
      [
        0.8903500437736511,
        1.6089295148849487
      ],
      [
        0.930302083492279,
        1.6352869272232056
      ],
      [
        0.9710133075714111,
        1.6610050201416016
      ],
      [
        1.0124471187591553,
        1.685992956161499
      ],
      [
        1.0545682907104492,
        1.7101572751998901
      ],
      [
        1.0973432064056396,
        1.7334026098251343
      ],
      [
        1.1407400369644165,
        1.755631923675537
      ],
      [
        1.1847283840179443,
        1.7767466306686401
      ],
      [
        1.229279637336731,
        1.7966474294662476
      ],
      [
        1.2743664979934692,
        1.8152343034744263
      ],
      [
        1.3199630975723267,
        1.8324071168899536
      ],
      [
        1.3660447597503662,
        1.848065733909607
      ],
      [
        1.4125876426696777,
        1.8621103763580322
      ],
      [
        1.4595685005187988,
        1.8744415044784546
      ],
      [
        1.5069485902786255,
        1.8850340843200684
      ],
      [
        1.5546728372573853,
        1.8939374685287476
      ],
      [
        1.6026861667633057,
        1.9012004137039185
      ],
      [
        1.650932788848877,
        1.9068700075149536
      ],
      [
        1.6993565559387207,
        1.9109913110733032
      ],
      [
        1.7479000091552734,
        1.9136062860488892
      ],
      [
        1.7965046167373657,
        1.9147534370422363
      ],
      [
        1.8451100587844849,
        1.9144660234451294
      ],
      [
        1.893654465675354,
        1.9127724170684814
      ],
      [
        1.942073941230774,
        1.9096946716308594
      ],
      [
        1.990302324295044,
        1.9052486419677734
      ],
      [
        2.03827166557312,
        1.8994430303573608
      ],
      [
        2.0859110355377197,
        1.8922799825668335
      ],
      [
        2.133148193359375,
        1.8837546110153198
      ],
      [
        2.179908037185669,
        1.8738549947738647
      ],
      [
        2.2261133193969727,
        1.862563133239746
      ],
      [
        2.27168607711792,
        1.8498553037643433
      ],
      [
        2.316545248031616,
        1.8357020616531372
      ],
      [
        2.360609769821167,
        1.8200697898864746
      ],
      [
        2.403796911239624,
        1.8029208183288574
      ],
      [
        2.44602370262146,
        1.7842146158218384
      ],
      [
        2.4872069358825684,
        1.7639081478118896
      ],
      [
        2.5272626876831055,
        1.7419570684432983
      ],
      [
        2.566108465194702,
        1.7183157205581665
      ],
      [
        2.603734254837036,
        1.693010687828064
      ],
      [
        2.640204906463623,
        1.6661417484283447
      ],
      [
        2.6755847930908203,
        1.6378077268600464
      ],
      [
        2.7099392414093018,
        1.6081076860427856
      ],
      [
        2.743333101272583,
        1.5771397352218628
      ],
      [
        2.7758309841156006,
        1.5450016260147095
      ],
      [
        2.80749773979187,
        1.5117899179458618
      ],
      [
        2.8383967876434326,
        1.477600336074829
      ],
      [
        2.868591785430908,
        1.4425268173217773
      ],
      [
        2.8981447219848633,
        1.4066622257232666
      ],
      [
        2.9271178245544434,
        1.3700973987579346
      ],
      [
        2.9555718898773193,
        1.3329222202301025
      ],
      [
        2.983566999435425,
        1.2952247858047485
      ],
      [
        3.011162757873535,
        1.2570923566818237
      ],
      [
        3.038418769836426,
        1.2186110019683838
      ],
      [
        3.0653929710388184,
        1.1798664331436157
      ],
      [
        3.092144250869751,
        1.1409443616867065
      ],
      [
        3.118730306625366,
        1.1019304990768433
      ],
      [
        3.1451311111450195,
        1.062860131263733
      ],
      [
        3.171248197555542,
        1.0237183570861816
      ],
      [
        3.1969823837280273,
        0.9844915866851807
      ],
      [
        3.222233295440674,
        0.945167064666748
      ],
      [
        3.246898889541626,
        0.905733585357666
      ],
      [
        3.270875930786133,
        0.8661811947822571
      ],
      [
        3.294057846069336,
        0.826501190662384
      ],
      [
        3.3163373470306396,
        0.7866860032081604
      ],
      [
        3.3376033306121826,
        0.7467291951179504
      ],
      [
        3.357743263244629,
        0.7066251039505005
      ],
      [
        3.3766419887542725,
        0.6663689613342285
      ],
      [
        3.394181966781616,
        0.6259563565254211
      ],
      [
        3.4102442264556885,
        0.5853835344314575
      ],
      [
        3.424708127975464,
        0.5446467995643616
      ],
      [
        3.4374516010284424,
        0.5037427544593811
      ],
      [
        3.448352098464966,
        0.46266797184944153
      ],
      [
        3.457287073135376,
        0.42141908407211304
      ],
      [
        3.4641330242156982,
        0.3799927234649658
      ],
      [
        3.468876838684082,
        0.33839157223701477
      ],
      [
        3.471616506576538,
        0.2966245114803314
      ],
      [
        3.4724483489990234,
        0.2547004222869873
      ],
      [
        3.4714694023132324,
        0.21262840926647186
      ],
      [
        3.4687745571136475,
        0.17041781544685364
      ],
      [
        3.4644577503204346,
        0.1280783712863922
      ],
      [
        3.4586100578308105,
        0.0856202244758606
      ],
      [
        3.4513206481933594,
        0.043054141104221344
      ],
      [
        3.442674398422241,
        0.0003915314737241715
      ],
      [
        3.432753562927246,
        -0.04235544055700302
      ],
      [
        3.421635389328003,
        -0.08517380803823471
      ],
      [
        3.409393072128296,
        -0.12804976105690002
      ],
      [
        3.3960955142974854,
        -0.17096871137619019
      ],
      [
        3.3818066120147705,
        -0.21391519904136658
      ],
      [
        3.366586685180664,
        -0.25687310099601746
      ],
      [
        3.3504908084869385,
        -0.29982560873031616
      ],
      [
        3.333570718765259,
        -0.34275543689727783
      ],
      [
        3.315875291824341,
        -0.3856448531150818
      ],
      [
        3.297449827194214,
        -0.42847591638565063
      ],
      [
        3.278337240219116,
        -0.47123050689697266
      ],
      [
        3.2585792541503906,
        -0.5138905048370361
      ],
      [
        3.238215923309326,
        -0.5564379692077637
      ],
      [
        3.217292308807373,
        -0.5988707542419434
      ],
      [
        3.1958582401275635,
        -0.6412028074264526
      ],
      [
        3.1739635467529297,
        -0.6834480166435242
      ],
      [
        3.1516590118408203,
        -0.7256200909614563
      ],
      [
        3.1289961338043213,
        -0.7677323818206787
      ],
      [
        3.1060280799865723,
        -0.8097975850105286
      ],
      [
        3.082807779312134,
        -0.8518276214599609
      ],
      [
        3.0593905448913574,
        -0.8938331604003906
      ],
      [
        3.0358314514160156,
        -0.9358235001564026
      ],
      [
        3.0062689781188965,
        -0.9883013367652893
      ],
      [
        2.9885144233703613,
        -1.0197867155075073
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/05_root_path/meta.json
================================================
{
  "text": "Initially standing still and calm, the person then starts jogging in a counterclockwise arc.",
  "duration": 6.033333333333333,
  "num_samples": 1,
  "seed": 62,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/06_root_waypoints/constraints.json
================================================
[
  {
    "type": "root2d",
    "frame_indices": [
      0,
      87,
      169,
      240
    ],
    "smooth_root_2d": [
      [
        0.037946805357933044,
        -0.036908961832523346
      ],
      [
        2.2506563663482666,
        0.06945009529590607
      ],
      [
        2.23332142829895,
        -2.0749685764312744
      ],
      [
        4.0815324783325195,
        -2.273184061050415
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/06_root_waypoints/meta.json
================================================
{
  "text": "A person is walking while carrying a small object in their left hand",
  "duration": 8.033333333333333,
  "num_samples": 1,
  "seed": 61,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/07_text_terrain/meta.json
================================================
{
  "text": "A person begins walking up the stairs",
  "duration": 3.5,
  "num_samples": 1,
  "seed": 44,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-g1-rp/08_text_object/meta.json
================================================
{
  "text": "A person picks up an object from low on their left side and places it up high",
  "duration": 5.033333333333333,
  "num_samples": 1,
  "seed": 47,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/01_single_text_prompt/meta.json
================================================
{
  "text": "A person runs forward and then leaps over an obstacle in front of them.",
  "duration": 5.0,
  "num_samples": 1,
  "seed": 42,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/02_multi_text_prompt/meta.json
================================================
{
  "texts": [
    "A person is walking forward casually.",
    "A person turns to the right and starts sneakily moving forward"
  ],
  "durations": [
    3.533333333333333,
    4.033333333333333
  ],
  "num_samples": 1,
  "seed": 42,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/03_full_body_keyframes/constraints.json
================================================
[
  {
    "type": "fullbody",
    "frame_indices": [
      79,
      134
    ],
    "local_joints_rot": [
      [
        [
          0.2765098512172699,
          0.3728594183921814,
          -0.3292054831981659
        ],
        [
          0.35604047775268555,
          0.018222831189632416,
          -0.054862238466739655
        ],
        [
          0.12065527588129044,
          -0.027457308024168015,
          -0.06907646358013153
        ],
        [
          0.6048485636711121,
          -0.11472737789154053,
          -0.19573566317558289
        ],
        [
          -0.12398597598075867,
          0.03840772435069084,
          0.18822282552719116
        ],
        [
          -0.06553511321544647,
          0.13032270967960358,
          0.04257704317569733
        ],
        [
          -0.24969959259033203,
          0.06990747153759003,
          0.13426002860069275
        ],
        [
          -0.002762501360848546,
          0.0010064352536574006,
          -0.0012083332985639572
        ],
        [
          -0.18770116567611694,
          -0.06528781354427338,
          0.006136383395642042
        ],
        [
          -0.18933561444282532,
          0.06753389537334442,
          -0.00862747710198164
        ],
        [
          0.1765439361333847,
          -0.5079103708267212,
          0.11742556095123291
        ],
        [
          -0.6833809614181519,
          -0.36341744661331177,
          -0.09875624626874924
        ],
        [
          -0.004083660896867514,
          -0.2955799102783203,
          0.007416445296257734
        ],
        [
          -0.46948903799057007,
          0.0019703502766788006,
          0.2218078076839447
        ],
        [
          0.15589098632335663,
          0.29247695207595825,
          -0.2839103043079376
        ],
        [
          -0.006183772347867489,
          0.039787642657756805,
          -1.0509610176086426
        ],
        [
          0.28110796213150024,
          -0.01673225313425064,
          0.05465283617377281
        ],
        [
          0.4582408368587494,
          0.6058111786842346,
          1.040449619293213
        ],
        [
          -0.016165010631084442,
          0.7843144536018372,
          0.007565980777144432
        ],
        [
          -0.21160456538200378,
          0.009858175180852413,
          0.022257711738348007
        ],
        [
          0.08559019863605499,
          -0.26941442489624023,
          0.28404051065444946
        ],
        [
          -0.0722564086318016,
          -0.055347055196762085,
          0.8767912983894348
        ],
        [
          -0.9036330580711365,
          -0.19308030605316162,
          0.6912829875946045
        ],
        [
          1.7018375396728516,
          -0.052370231598615646,
          0.0016176343197003007
        ],
        [
          -0.6713079810142517,
          -0.22423480451107025,
          -0.17199599742889404
        ],
        [
          -0.2397085577249527,
          -0.04111046716570854,
          0.02976534143090248
        ],
        [
          -1.4084941148757935,
          -0.42399686574935913,
          0.23780424892902374
        ],
        [
          1.488803744316101,
          -0.006882219575345516,
          0.005796314682811499
        ],
        [
          -0.34890878200531006,
          0.25402817130088806,
          -0.10165958851575851
        ],
        [
          -0.017090337350964546,
          0.013983047567307949,
          -0.02469288557767868
        ]
      ],
      [
        [
          -0.10219376534223557,
          0.15241079032421112,
          -0.1140606626868248
        ],
        [
          -0.07097288966178894,
          -0.023205779492855072,
          0.014893154613673687
        ],
        [
          -0.11436910182237625,
          -0.07182353734970093,
          -0.024793410673737526
        ],
        [
          0.32571300864219666,
          -0.11312247067689896,
          -0.017911700531840324
        ],
        [
          0.036515623331069946,
          -0.0007576555362902582,
          0.14029929041862488
        ],
        [
          -0.06553909182548523,
          0.07225329428911209,
          0.0065536051988601685
        ],
        [
          -0.09946814924478531,
          0.02283940091729164,
          0.060293473303318024
        ],
        [
          -0.0007363191107288003,
          0.0019088855478912592,
          0.00034123589284718037
        ],
        [
          -0.18651022017002106,
          -0.06423485279083252,
          0.0069741918705403805
        ],
        [
          -0.18586836755275726,
          0.06800899654626846,
          -0.0060585117898881435
        ],
        [
          0.23363706469535828,
          -0.20687633752822876,
          -0.07240967452526093
        ],
        [
          -0.3135974407196045,
          -0.2623864710330963,
          -1.0657873153686523
        ],
        [
          -0.012310811318457127,
          -1.6650079488754272,
          -0.010509567335247993
        ],
        [
          -0.8171713352203369,
          -0.2551392912864685,
          0.08705981075763702
        ],
        [
          0.13723036646842957,
          0.2864063084125519,
          -0.2900709807872772
        ],
        [
          -0.005930017679929733,
          0.05293968319892883,
          -1.0459250211715698
        ],
        [
          0.24218180775642395,
          0.02018338069319725,
          0.1226770281791687
        ],
        [
          0.3315959572792053,
          0.3782292902469635,
          1.2296319007873535
        ],
        [
          -0.0014527677558362484,
          0.3045952022075653,
          -0.0014049106976017356
        ],
        [
          -0.20010970532894135,
          -0.07485076785087585,
          0.0041703470051288605
        ],
        [
          0.08470325917005539,
          -0.3079097270965576,
          0.29375413060188293
        ],
        [
          -0.09725581854581833,
          -0.055068179965019226,
          0.8742175698280334
        ],
        [
          0.4040503203868866,
          -0.016711091622710228,
          0.21672509610652924
        ],
        [
          0.5082376599311829,
          -0.013459251262247562,
          0.004872385878115892
        ],
        [
          0.1745426058769226,
          -0.24501416087150574,
          -0.003703102469444275
        ],
        [
          -0.33402949571609497,
          -0.035541169345378876,
          0.032360970973968506
        ],
        [
          -0.37681734561920166,
          0.02067263424396515,
          0.10783999413251877
        ],
        [
          0.4257254898548126,
          0.0016118268249556422,
          0.0033562832977622747
        ],
        [
          0.04139057174324989,
          0.032555095851421356,
          0.04008425772190094
        ],
        [
          -0.03090120106935501,
          0.01570875011384487,
          -0.024774780496954918
        ]
      ]
    ],
    "root_positions": [
      [
        -0.18697306513786316,
        0.7126776576042175,
        1.1559109687805176
      ],
      [
        -0.014062155969440937,
        0.9611971974372864,
        2.898127555847168
      ]
    ],
    "smooth_root_2d": [
      [
        -0.18697306513786316,
        1.1559109687805176
      ],
      [
        -0.014062155969440937,
        2.898127555847168
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/03_full_body_keyframes/meta.json
================================================
{
  "text": "A person walks forward and picks something up from the ground",
  "duration": 5.0,
  "num_samples": 1,
  "seed": 43,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/04_ee_constraint/constraints.json
================================================
[
  {
    "type": "right-foot",
    "frame_indices": [
      28,
      94
    ],
    "local_joints_rot": [
      [
        [
          0.14788010716438293,
          -0.010833931155502796,
          -0.01388303842395544
        ],
        [
          -0.03901153802871704,
          0.0003969503741245717,
          -0.00016447225061710924
        ],
        [
          -0.09507032483816147,
          0.008639314211905003,
          -0.0073561337776482105
        ],
        [
          0.21237806975841522,
          -0.02139095962047577,
          -0.01700877584517002
        ],
        [
          -0.20991119742393494,
          0.06551700085401535,
          -0.05272415280342102
        ],
        [
          -0.06337061524391174,
          0.05204080045223236,
          0.014292852953076363
        ],
        [
          0.07047945261001587,
          0.08330309391021729,
          -0.002013514516875148
        ],
        [
          -0.0019600456580519676,
          -0.0013381227618083358,
          -2.7628393581835553e-06
        ],
        [
          -0.18709787726402283,
          -0.06659803539514542,
          0.0078862514346838
        ],
        [
          -0.18698126077651978,
          0.06395528465509415,
          -0.008215037174522877
        ],
        [
          0.08230585604906082,
          -0.38376951217651367,
          0.05542140454053879
        ],
        [
          -0.7260366082191467,
          -0.24878422915935516,
          -0.35609468817710876
        ],
        [
          0.004249485209584236,
          -0.4476320147514343,
          -0.018469776958227158
        ],
        [
          -0.9212101697921753,
          -0.1470143049955368,
          0.5044775605201721
        ],
        [
          0.14870156347751617,
          0.2985619604587555,
          -0.29298385977745056
        ],
        [
          0.001955621177330613,
          0.055549487471580505,
          -1.0630463361740112
        ],
        [
          0.11859050393104553,
          0.46535199880599976,
          -0.030845582485198975
        ],
        [
          -0.7298654317855835,
          0.5346517562866211,
          0.2791443467140198
        ],
        [
          0.008972911164164543,
          0.48752307891845703,
          0.01847967691719532
        ],
        [
          -0.5805565118789673,
          -0.08708631247282028,
          -0.15088550746440887
        ],
        [
          0.08582834899425507,
          -0.2886488735675812,
          0.2854447066783905
        ],
        [
          -0.0898093581199646,
          -0.05874425172805786,
          0.8657776117324829
        ],
        [
          -0.3135877549648285,
          0.07464626431465149,
          0.0517989918589592
        ],
        [
          0.29447537660598755,
          -0.003720453940331936,
          0.0011728419922292233
        ],
        [
          -0.12890003621578217,
          0.0839272066950798,
          -0.090343177318573
        ],
        [
          0.008360159583389759,
          -0.03457032889127731,
          0.02827553078532219
        ],
        [
          -0.3120643198490143,
          -0.01133657619357109,
          -0.03218594938516617
        ],
        [
          0.2538771331310272,
          0.0018040596041828394,
          0.0009352069464512169
        ],
        [
          -0.0887608677148819,
          -0.03465384244918823,
          0.07154331356287003
        ],
        [
          0.01681467890739441,
          0.01778421923518181,
          -0.025033073499798775
        ]
      ],
      [
        [
          0.21243979036808014,
          1.0922467708587646,
          -0.05739659443497658
        ],
        [
          -0.04288899898529053,
          0.019888481125235558,
          -0.014078406617045403
        ],
        [
          -0.09594971686601639,
          0.10335114598274231,
          -0.007776615675538778
        ],
        [
          0.2422163188457489,
          0.08445896953344345,
          -0.05605608597397804
        ],
        [
          -0.14986605942249298,
          0.10279522091150284,
          -0.19410337507724762
        ],
        [
          -0.07278254628181458,
          0.00021229058620519936,
          -0.0064666238613426685
        ],
        [
          -0.18101167678833008,
          -0.047196485102176666,
          0.09371022135019302
        ],
        [
          -0.0013136633206158876,
          -0.0020103836432099342,
          -0.0002618256548885256
        ],
        [
          -0.1867513209581375,
          -0.0681525468826294,
          0.0023792991414666176
        ],
        [
          -0.18714284896850586,
          0.06443598866462708,
          -0.003183535533025861
        ],
        [
          0.1040755957365036,
          -0.1164601668715477,
          -0.08953910320997238
        ],
        [
          -0.7818892598152161,
          -0.40082883834838867,
          -0.40901198983192444
        ],
        [
          0.0014971806667745113,
          -0.7006690502166748,
          -0.003588718129321933
        ],
        [
          -0.7653300762176514,
          -0.030549153685569763,
          0.5779297947883606
        ],
        [
          0.1444747895002365,
          0.30648332834243774,
          -0.2944350242614746
        ],
        [
          0.00627485616132617,
          0.05844533443450928,
          -1.0504485368728638
        ],
        [
          0.16790169477462769,
          0.6803913116455078,
          -0.0802350640296936
        ],
        [
          -0.7650246620178223,
          0.2571314871311188,
          0.044474273920059204
        ],
        [
          0.00177879654802382,
          0.32478848099708557,
          0.024663111194968224
        ],
        [
          -1.1130585670471191,
          0.06198093295097351,
          -0.1499929279088974
        ],
        [
          0.09419120848178864,
          -0.28672322630882263,
          0.2861841320991516
        ],
        [
          -0.08110660314559937,
          -0.06315471976995468,
          0.8641197085380554
        ],
        [
          -0.4702282249927521,
          -0.2976788580417633,
          -0.08966172486543655
        ],
        [
          0.2188275307416916,
          -0.010813144035637379,
          -0.0024994502309709787
        ],
        [
          0.12644176185131073,
          -0.4933742582798004,
          -0.23269610106945038
        ],
        [
          -0.05216464772820473,
          -0.03182952478528023,
          0.026469329372048378
        ],
        [
          -0.21055173873901367,
          -0.5854666233062744,
          -0.08316371589899063
        ],
        [
          0.2703852653503418,
          -0.0070351893082261086,
          0.00034556735772639513
        ],
        [
          -0.20080512762069702,
          -0.5529999136924744,
          0.08794122189283371
        ],
        [
          -0.020619722083210945,
          0.01961597241461277,
          -0.02498687617480755
        ]
      ]
    ],
    "root_positions": [
      [
        0.006224155426025391,
        1.0099574327468872,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        1.0039517879486084,
        0.0002174415858462453
      ]
    ],
    "smooth_root_2d": [
      [
        0.006224155426025391,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        0.0002174415858462453
      ]
    ]
  },
  {
    "type": "left-foot",
    "frame_indices": [
      28,
      94
    ],
    "local_joints_rot": [
      [
        [
          0.14788010716438293,
          -0.010833931155502796,
          -0.01388303842395544
        ],
        [
          -0.03901153802871704,
          0.0003969503741245717,
          -0.00016447225061710924
        ],
        [
          -0.09507032483816147,
          0.008639314211905003,
          -0.0073561337776482105
        ],
        [
          0.21237806975841522,
          -0.02139095962047577,
          -0.01700877584517002
        ],
        [
          -0.20991119742393494,
          0.06551700085401535,
          -0.05272415280342102
        ],
        [
          -0.06337061524391174,
          0.05204080045223236,
          0.014292852953076363
        ],
        [
          0.07047945261001587,
          0.08330309391021729,
          -0.002013514516875148
        ],
        [
          -0.0019600456580519676,
          -0.0013381227618083358,
          -2.7628393581835553e-06
        ],
        [
          -0.18709787726402283,
          -0.06659803539514542,
          0.0078862514346838
        ],
        [
          -0.18698126077651978,
          0.06395528465509415,
          -0.008215037174522877
        ],
        [
          0.08230585604906082,
          -0.38376951217651367,
          0.05542140454053879
        ],
        [
          -0.7260366082191467,
          -0.24878422915935516,
          -0.35609468817710876
        ],
        [
          0.004249485209584236,
          -0.4476320147514343,
          -0.018469776958227158
        ],
        [
          -0.9212101697921753,
          -0.1470143049955368,
          0.5044775605201721
        ],
        [
          0.14870156347751617,
          0.2985619604587555,
          -0.29298385977745056
        ],
        [
          0.001955621177330613,
          0.055549487471580505,
          -1.0630463361740112
        ],
        [
          0.11859050393104553,
          0.46535199880599976,
          -0.030845582485198975
        ],
        [
          -0.7298654317855835,
          0.5346517562866211,
          0.2791443467140198
        ],
        [
          0.008972911164164543,
          0.48752307891845703,
          0.01847967691719532
        ],
        [
          -0.5805565118789673,
          -0.08708631247282028,
          -0.15088550746440887
        ],
        [
          0.08582834899425507,
          -0.2886488735675812,
          0.2854447066783905
        ],
        [
          -0.0898093581199646,
          -0.05874425172805786,
          0.8657776117324829
        ],
        [
          -0.3135877549648285,
          0.07464626431465149,
          0.0517989918589592
        ],
        [
          0.29447537660598755,
          -0.003720453940331936,
          0.0011728419922292233
        ],
        [
          -0.12890003621578217,
          0.0839272066950798,
          -0.090343177318573
        ],
        [
          0.008360159583389759,
          -0.03457032889127731,
          0.02827553078532219
        ],
        [
          -0.3120643198490143,
          -0.01133657619357109,
          -0.03218594938516617
        ],
        [
          0.2538771331310272,
          0.0018040596041828394,
          0.0009352069464512169
        ],
        [
          -0.0887608677148819,
          -0.03465384244918823,
          0.07154331356287003
        ],
        [
          0.01681467890739441,
          0.01778421923518181,
          -0.025033073499798775
        ]
      ],
      [
        [
          0.21243979036808014,
          1.0922467708587646,
          -0.05739659443497658
        ],
        [
          -0.04288899898529053,
          0.019888481125235558,
          -0.014078406617045403
        ],
        [
          -0.09594971686601639,
          0.10335114598274231,
          -0.007776615675538778
        ],
        [
          0.2422163188457489,
          0.08445896953344345,
          -0.05605608597397804
        ],
        [
          -0.14986605942249298,
          0.10279522091150284,
          -0.19410337507724762
        ],
        [
          -0.07278254628181458,
          0.00021229058620519936,
          -0.0064666238613426685
        ],
        [
          -0.18101167678833008,
          -0.047196485102176666,
          0.09371022135019302
        ],
        [
          -0.0013136633206158876,
          -0.0020103836432099342,
          -0.0002618256548885256
        ],
        [
          -0.1867513209581375,
          -0.0681525468826294,
          0.0023792991414666176
        ],
        [
          -0.18714284896850586,
          0.06443598866462708,
          -0.003183535533025861
        ],
        [
          0.1040755957365036,
          -0.1164601668715477,
          -0.08953910320997238
        ],
        [
          -0.7818892598152161,
          -0.40082883834838867,
          -0.40901198983192444
        ],
        [
          0.0014971806667745113,
          -0.7006690502166748,
          -0.003588718129321933
        ],
        [
          -0.7653300762176514,
          -0.030549153685569763,
          0.5779297947883606
        ],
        [
          0.1444747895002365,
          0.30648332834243774,
          -0.2944350242614746
        ],
        [
          0.00627485616132617,
          0.05844533443450928,
          -1.0504485368728638
        ],
        [
          0.16790169477462769,
          0.6803913116455078,
          -0.0802350640296936
        ],
        [
          -0.7650246620178223,
          0.2571314871311188,
          0.044474273920059204
        ],
        [
          0.00177879654802382,
          0.32478848099708557,
          0.024663111194968224
        ],
        [
          -1.1130585670471191,
          0.06198093295097351,
          -0.1499929279088974
        ],
        [
          0.09419120848178864,
          -0.28672322630882263,
          0.2861841320991516
        ],
        [
          -0.08110660314559937,
          -0.06315471976995468,
          0.8641197085380554
        ],
        [
          -0.4702282249927521,
          -0.2976788580417633,
          -0.08966172486543655
        ],
        [
          0.2188275307416916,
          -0.010813144035637379,
          -0.0024994502309709787
        ],
        [
          0.12644176185131073,
          -0.4933742582798004,
          -0.23269610106945038
        ],
        [
          -0.05216464772820473,
          -0.03182952478528023,
          0.026469329372048378
        ],
        [
          -0.21055173873901367,
          -0.5854666233062744,
          -0.08316371589899063
        ],
        [
          0.2703852653503418,
          -0.0070351893082261086,
          0.00034556735772639513
        ],
        [
          -0.20080512762069702,
          -0.5529999136924744,
          0.08794122189283371
        ],
        [
          -0.020619722083210945,
          0.01961597241461277,
          -0.02498687617480755
        ]
      ]
    ],
    "root_positions": [
      [
        0.006224155426025391,
        1.0099574327468872,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        1.0039517879486084,
        0.0002174415858462453
      ]
    ],
    "smooth_root_2d": [
      [
        0.006224155426025391,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        0.0002174415858462453
      ]
    ]
  },
  {
    "type": "left-hand",
    "frame_indices": [
      28,
      94
    ],
    "local_joints_rot": [
      [
        [
          0.14788010716438293,
          -0.010833931155502796,
          -0.01388303842395544
        ],
        [
          -0.03901153802871704,
          0.0003969503741245717,
          -0.00016447225061710924
        ],
        [
          -0.09507032483816147,
          0.008639314211905003,
          -0.0073561337776482105
        ],
        [
          0.21237806975841522,
          -0.02139095962047577,
          -0.01700877584517002
        ],
        [
          -0.20991119742393494,
          0.06551700085401535,
          -0.05272415280342102
        ],
        [
          -0.06337061524391174,
          0.05204080045223236,
          0.014292852953076363
        ],
        [
          0.07047945261001587,
          0.08330309391021729,
          -0.002013514516875148
        ],
        [
          -0.0019600456580519676,
          -0.0013381227618083358,
          -2.7628393581835553e-06
        ],
        [
          -0.18709787726402283,
          -0.06659803539514542,
          0.0078862514346838
        ],
        [
          -0.18698126077651978,
          0.06395528465509415,
          -0.008215037174522877
        ],
        [
          0.08230585604906082,
          -0.38376951217651367,
          0.05542140454053879
        ],
        [
          -0.7260366082191467,
          -0.24878422915935516,
          -0.35609468817710876
        ],
        [
          0.004249485209584236,
          -0.4476320147514343,
          -0.018469776958227158
        ],
        [
          -0.9212101697921753,
          -0.1470143049955368,
          0.5044775605201721
        ],
        [
          0.14870156347751617,
          0.2985619604587555,
          -0.29298385977745056
        ],
        [
          0.001955621177330613,
          0.055549487471580505,
          -1.0630463361740112
        ],
        [
          0.11859050393104553,
          0.46535199880599976,
          -0.030845582485198975
        ],
        [
          -0.7298654317855835,
          0.5346517562866211,
          0.2791443467140198
        ],
        [
          0.008972911164164543,
          0.48752307891845703,
          0.01847967691719532
        ],
        [
          -0.5805565118789673,
          -0.08708631247282028,
          -0.15088550746440887
        ],
        [
          0.08582834899425507,
          -0.2886488735675812,
          0.2854447066783905
        ],
        [
          -0.0898093581199646,
          -0.05874425172805786,
          0.8657776117324829
        ],
        [
          -0.3135877549648285,
          0.07464626431465149,
          0.0517989918589592
        ],
        [
          0.29447537660598755,
          -0.003720453940331936,
          0.0011728419922292233
        ],
        [
          -0.12890003621578217,
          0.0839272066950798,
          -0.090343177318573
        ],
        [
          0.008360159583389759,
          -0.03457032889127731,
          0.02827553078532219
        ],
        [
          -0.3120643198490143,
          -0.01133657619357109,
          -0.03218594938516617
        ],
        [
          0.2538771331310272,
          0.0018040596041828394,
          0.0009352069464512169
        ],
        [
          -0.0887608677148819,
          -0.03465384244918823,
          0.07154331356287003
        ],
        [
          0.01681467890739441,
          0.01778421923518181,
          -0.025033073499798775
        ]
      ],
      [
        [
          0.21243979036808014,
          1.0922467708587646,
          -0.05739659443497658
        ],
        [
          -0.04288899898529053,
          0.019888481125235558,
          -0.014078406617045403
        ],
        [
          -0.09594971686601639,
          0.10335114598274231,
          -0.007776615675538778
        ],
        [
          0.2422163188457489,
          0.08445896953344345,
          -0.05605608597397804
        ],
        [
          -0.14986605942249298,
          0.10279522091150284,
          -0.19410337507724762
        ],
        [
          -0.07278254628181458,
          0.00021229058620519936,
          -0.0064666238613426685
        ],
        [
          -0.18101167678833008,
          -0.047196485102176666,
          0.09371022135019302
        ],
        [
          -0.0013136633206158876,
          -0.0020103836432099342,
          -0.0002618256548885256
        ],
        [
          -0.1867513209581375,
          -0.0681525468826294,
          0.0023792991414666176
        ],
        [
          -0.18714284896850586,
          0.06443598866462708,
          -0.003183535533025861
        ],
        [
          0.1040755957365036,
          -0.1164601668715477,
          -0.08953910320997238
        ],
        [
          -0.7818892598152161,
          -0.40082883834838867,
          -0.40901198983192444
        ],
        [
          0.0014971806667745113,
          -0.7006690502166748,
          -0.003588718129321933
        ],
        [
          -0.7653300762176514,
          -0.030549153685569763,
          0.5779297947883606
        ],
        [
          0.1444747895002365,
          0.30648332834243774,
          -0.2944350242614746
        ],
        [
          0.00627485616132617,
          0.05844533443450928,
          -1.0504485368728638
        ],
        [
          0.16790169477462769,
          0.6803913116455078,
          -0.0802350640296936
        ],
        [
          -0.7650246620178223,
          0.2571314871311188,
          0.044474273920059204
        ],
        [
          0.00177879654802382,
          0.32478848099708557,
          0.024663111194968224
        ],
        [
          -1.1130585670471191,
          0.06198093295097351,
          -0.1499929279088974
        ],
        [
          0.09419120848178864,
          -0.28672322630882263,
          0.2861841320991516
        ],
        [
          -0.08110660314559937,
          -0.06315471976995468,
          0.8641197085380554
        ],
        [
          -0.4702282249927521,
          -0.2976788580417633,
          -0.08966172486543655
        ],
        [
          0.2188275307416916,
          -0.010813144035637379,
          -0.0024994502309709787
        ],
        [
          0.12644176185131073,
          -0.4933742582798004,
          -0.23269610106945038
        ],
        [
          -0.05216464772820473,
          -0.03182952478528023,
          0.026469329372048378
        ],
        [
          -0.21055173873901367,
          -0.5854666233062744,
          -0.08316371589899063
        ],
        [
          0.2703852653503418,
          -0.0070351893082261086,
          0.00034556735772639513
        ],
        [
          -0.20080512762069702,
          -0.5529999136924744,
          0.08794122189283371
        ],
        [
          -0.020619722083210945,
          0.01961597241461277,
          -0.02498687617480755
        ]
      ]
    ],
    "root_positions": [
      [
        0.006224155426025391,
        1.0099574327468872,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        1.0039517879486084,
        0.0002174415858462453
      ]
    ],
    "smooth_root_2d": [
      [
        0.006224155426025391,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        0.0002174415858462453
      ]
    ]
  },
  {
    "type": "right-hand",
    "frame_indices": [
      28,
      94
    ],
    "local_joints_rot": [
      [
        [
          0.14788010716438293,
          -0.010833931155502796,
          -0.01388303842395544
        ],
        [
          -0.03901153802871704,
          0.0003969503741245717,
          -0.00016447225061710924
        ],
        [
          -0.09507032483816147,
          0.008639314211905003,
          -0.0073561337776482105
        ],
        [
          0.21237806975841522,
          -0.02139095962047577,
          -0.01700877584517002
        ],
        [
          -0.20991119742393494,
          0.06551700085401535,
          -0.05272415280342102
        ],
        [
          -0.06337061524391174,
          0.05204080045223236,
          0.014292852953076363
        ],
        [
          0.07047945261001587,
          0.08330309391021729,
          -0.002013514516875148
        ],
        [
          -0.0019600456580519676,
          -0.0013381227618083358,
          -2.7628393581835553e-06
        ],
        [
          -0.18709787726402283,
          -0.06659803539514542,
          0.0078862514346838
        ],
        [
          -0.18698126077651978,
          0.06395528465509415,
          -0.008215037174522877
        ],
        [
          0.08230585604906082,
          -0.38376951217651367,
          0.05542140454053879
        ],
        [
          -0.7260366082191467,
          -0.24878422915935516,
          -0.35609468817710876
        ],
        [
          0.004249485209584236,
          -0.4476320147514343,
          -0.018469776958227158
        ],
        [
          -0.9212101697921753,
          -0.1470143049955368,
          0.5044775605201721
        ],
        [
          0.14870156347751617,
          0.2985619604587555,
          -0.29298385977745056
        ],
        [
          0.001955621177330613,
          0.055549487471580505,
          -1.0630463361740112
        ],
        [
          0.11859050393104553,
          0.46535199880599976,
          -0.030845582485198975
        ],
        [
          -0.7298654317855835,
          0.5346517562866211,
          0.2791443467140198
        ],
        [
          0.008972911164164543,
          0.48752307891845703,
          0.01847967691719532
        ],
        [
          -0.5805565118789673,
          -0.08708631247282028,
          -0.15088550746440887
        ],
        [
          0.08582834899425507,
          -0.2886488735675812,
          0.2854447066783905
        ],
        [
          -0.0898093581199646,
          -0.05874425172805786,
          0.8657776117324829
        ],
        [
          -0.3135877549648285,
          0.07464626431465149,
          0.0517989918589592
        ],
        [
          0.29447537660598755,
          -0.003720453940331936,
          0.0011728419922292233
        ],
        [
          -0.12890003621578217,
          0.0839272066950798,
          -0.090343177318573
        ],
        [
          0.008360159583389759,
          -0.03457032889127731,
          0.02827553078532219
        ],
        [
          -0.3120643198490143,
          -0.01133657619357109,
          -0.03218594938516617
        ],
        [
          0.2538771331310272,
          0.0018040596041828394,
          0.0009352069464512169
        ],
        [
          -0.0887608677148819,
          -0.03465384244918823,
          0.07154331356287003
        ],
        [
          0.01681467890739441,
          0.01778421923518181,
          -0.025033073499798775
        ]
      ],
      [
        [
          0.21243979036808014,
          1.0922467708587646,
          -0.05739659443497658
        ],
        [
          -0.04288899898529053,
          0.019888481125235558,
          -0.014078406617045403
        ],
        [
          -0.09594971686601639,
          0.10335114598274231,
          -0.007776615675538778
        ],
        [
          0.2422163188457489,
          0.08445896953344345,
          -0.05605608597397804
        ],
        [
          -0.14986605942249298,
          0.10279522091150284,
          -0.19410337507724762
        ],
        [
          -0.07278254628181458,
          0.00021229058620519936,
          -0.0064666238613426685
        ],
        [
          -0.18101167678833008,
          -0.047196485102176666,
          0.09371022135019302
        ],
        [
          -0.0013136633206158876,
          -0.0020103836432099342,
          -0.0002618256548885256
        ],
        [
          -0.1867513209581375,
          -0.0681525468826294,
          0.0023792991414666176
        ],
        [
          -0.18714284896850586,
          0.06443598866462708,
          -0.003183535533025861
        ],
        [
          0.1040755957365036,
          -0.1164601668715477,
          -0.08953910320997238
        ],
        [
          -0.7818892598152161,
          -0.40082883834838867,
          -0.40901198983192444
        ],
        [
          0.0014971806667745113,
          -0.7006690502166748,
          -0.003588718129321933
        ],
        [
          -0.7653300762176514,
          -0.030549153685569763,
          0.5779297947883606
        ],
        [
          0.1444747895002365,
          0.30648332834243774,
          -0.2944350242614746
        ],
        [
          0.00627485616132617,
          0.05844533443450928,
          -1.0504485368728638
        ],
        [
          0.16790169477462769,
          0.6803913116455078,
          -0.0802350640296936
        ],
        [
          -0.7650246620178223,
          0.2571314871311188,
          0.044474273920059204
        ],
        [
          0.00177879654802382,
          0.32478848099708557,
          0.024663111194968224
        ],
        [
          -1.1130585670471191,
          0.06198093295097351,
          -0.1499929279088974
        ],
        [
          0.09419120848178864,
          -0.28672322630882263,
          0.2861841320991516
        ],
        [
          -0.08110660314559937,
          -0.06315471976995468,
          0.8641197085380554
        ],
        [
          -0.4702282249927521,
          -0.2976788580417633,
          -0.08966172486543655
        ],
        [
          0.2188275307416916,
          -0.010813144035637379,
          -0.0024994502309709787
        ],
        [
          0.12644176185131073,
          -0.4933742582798004,
          -0.23269610106945038
        ],
        [
          -0.05216464772820473,
          -0.03182952478528023,
          0.026469329372048378
        ],
        [
          -0.21055173873901367,
          -0.5854666233062744,
          -0.08316371589899063
        ],
        [
          0.2703852653503418,
          -0.0070351893082261086,
          0.00034556735772639513
        ],
        [
          -0.20080512762069702,
          -0.5529999136924744,
          0.08794122189283371
        ],
        [
          -0.020619722083210945,
          0.01961597241461277,
          -0.02498687617480755
        ]
      ]
    ],
    "root_positions": [
      [
        0.006224155426025391,
        1.0099574327468872,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        1.0039517879486084,
        0.0002174415858462453
      ]
    ],
    "smooth_root_2d": [
      [
        0.006224155426025391,
        0.0004121592501178384
      ],
      [
        0.025673866271972656,
        0.0002174415858462453
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/04_ee_constraint/meta.json
================================================
{
  "text": "A person picks up an object in front of them with two hands and places it to the left side",
  "duration": 5.033333333333333,
  "num_samples": 1,
  "seed": 48,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/05_root_path/constraints.json
================================================
[
  {
    "type": "root2d",
    "frame_indices": [
      0,
      1,
      2,
      3,
      4,
      5,
      6,
      7,
      8,
      9,
      10,
      11,
      12,
      13,
      14,
      15,
      16,
      17,
      18,
      19,
      20,
      21,
      22,
      23,
      24,
      25,
      26,
      27,
      28,
      29,
      30,
      31,
      32,
      33,
      34,
      35,
      36,
      37,
      38,
      39,
      40,
      41,
      42,
      43,
      44,
      45,
      46,
      47,
      48,
      49,
      50,
      51,
      52,
      53,
      54,
      55,
      56,
      57,
      58,
      59,
      60,
      61,
      62,
      63,
      64,
      65,
      66,
      67,
      68,
      69,
      70,
      71,
      72,
      73,
      74,
      75,
      76,
      77,
      78,
      79,
      80,
      81,
      82,
      83,
      84,
      85,
      86,
      87,
      88,
      89,
      90,
      91,
      92,
      93,
      94,
      95,
      96,
      97,
      98,
      99,
      100,
      101,
      102,
      103,
      104,
      105,
      106,
      107,
      108,
      109,
      110,
      111,
      112,
      113,
      114,
      115,
      116,
      117,
      118,
      119,
      120,
      121,
      122,
      123,
      124,
      125,
      126,
      127,
      128,
      129,
      130,
      131,
      132,
      133,
      134,
      135,
      136,
      137,
      138,
      139,
      140,
      141,
      142,
      143,
      144,
      145,
      146,
      147,
      148,
      149,
      150,
      151,
      152,
      153,
      154,
      155,
      156,
      157,
      158,
      159,
      160,
      161,
      162,
      163,
      164,
      165,
      166,
      167,
      168,
      169,
      170,
      171,
      172,
      173,
      174,
      175,
      176,
      177,
      178,
      179,
      180,
      181,
      182,
      183,
      184,
      185,
      186,
      187,
      188,
      189,
      190,
      191,
      192,
      193,
      194,
      195,
      196,
      197,
      198,
      199,
      200,
      201,
      202,
      203,
      204,
      205,
      206,
      207,
      208,
      209,
      210,
      211,
      212,
      213,
      214,
      215,
      216,
      217,
      218,
      219,
      220,
      221,
      222,
      223,
      224,
      225,
      226,
      227,
      228,
      229,
      230,
      231,
      232,
      233,
      234,
      235,
      236,
      237,
      238,
      239,
      240,
      241,
      242,
      243,
      244,
      245,
      246,
      247,
      248,
      249,
      250,
      251,
      252,
      253,
      254,
      255,
      256,
      257,
      258,
      259,
      260,
      261,
      262,
      263,
      264,
      265,
      266,
      267,
      268,
      269,
      270,
      271,
      272,
      273,
      274,
      275,
      276,
      277,
      278,
      279,
      280,
      281,
      282,
      283,
      284,
      285,
      286,
      287,
      288,
      289,
      290,
      291,
      292,
      293,
      294,
      295,
      296,
      297,
      298,
      299
    ],
    "smooth_root_2d": [
      [
        0.0720488652586937,
        0.005473949480801821
      ],
      [
        0.08682604879140854,
        0.03799768537282944
      ],
      [
        0.10160323977470398,
        0.07052142173051834
      ],
      [
        0.11638043075799942,
        0.10304517298936844
      ],
      [
        0.13115762174129486,
        0.13556894659996033
      ],
      [
        0.1459348350763321,
        0.1680927276611328
      ],
      [
        0.16071203351020813,
        0.20061656832695007
      ],
      [
        0.17548926174640656,
        0.23314043879508972
      ],
      [
        0.19026650488376617,
        0.26566436886787415
      ],
      [
        0.2050437480211258,
        0.29818838834762573
      ],
      [
        0.2198210209608078,
        0.3307124972343445
      ],
      [
        0.234598308801651,
        0.3632366955280304
      ],
      [
        0.2493756115436554,
        0.39576101303100586
      ],
      [
        0.2641529440879822,
        0.42828547954559326
      ],
      [
        0.27893027663230896,
        0.4608100950717926
      ],
      [
        0.29370763897895813,
        0.4933348596096039
      ],
      [
        0.3084850013256073,
        0.5258598327636719
      ],
      [
        0.32326239347457886,
        0.5583849549293518
      ],
      [
        0.3380397856235504,
        0.5909103155136108
      ],
      [
        0.352817177772522,
        0.623435914516449
      ],
      [
        0.36759456992149353,
        0.6559617519378662
      ],
      [
        0.3823719322681427,
        0.6884878277778625
      ],
      [
        0.39714932441711426,
        0.721014142036438
      ],
      [
        0.41192665696144104,
        0.7535408139228821
      ],
      [
        0.4267039895057678,
        0.7860677242279053
      ],
      [
        0.4414812922477722,
        0.8185949325561523
      ],
      [
        0.4562585949897766,
        0.8511224389076233
      ],
      [
        0.47103583812713623,
        0.8836503028869629
      ],
      [
        0.48581308126449585,
        0.9161785244941711
      ],
      [
        0.5005902647972107,
        0.948707103729248
      ],
      [
        0.5153675079345703,
        0.9812359809875488
      ],
      [
        0.5301446914672852,
        1.0137652158737183
      ],
      [
        0.5449219346046448,
        1.046294927597046
      ],
      [
        0.5596991777420044,
        1.0788248777389526
      ],
      [
        0.5744765400886536,
        1.1113553047180176
      ],
      [
        0.5892539024353027,
        1.1438862085342407
      ],
      [
        0.6040313243865967,
        1.1764174699783325
      ],
      [
        0.6188088655471802,
        1.208949089050293
      ],
      [
        0.6335865259170532,
        1.2414813041687012
      ],
      [
        0.648364245891571,
        1.274013876914978
      ],
      [
        0.6631421446800232,
        1.3065470457077026
      ],
      [
        0.6779201030731201,
        1.3390806913375854
      ],
      [
        0.6926981806755066,
        1.371614933013916
      ],
      [
        0.7074640989303589,
        1.4041519165039062
      ],
      [
        0.7221670746803284,
        1.4367012977600098
      ],
      [
        0.7367299199104309,
        1.4692773818969727
      ],
      [
        0.7510751485824585,
        1.5018945932388306
      ],
      [
        0.7651242613792419,
        1.5345673561096191
      ],
      [
        0.7787973880767822,
        1.5673108100891113
      ],
      [
        0.7920125126838684,
        1.6001399755477905
      ],
      [
        0.8046852350234985,
        1.6330705881118774
      ],
      [
        0.8167278170585632,
        1.66611909866333
      ],
      [
        0.8280492424964905,
        1.6993021965026855
      ],
      [
        0.8385547399520874,
        1.7326377630233765
      ],
      [
        0.8481456637382507,
        1.766144037246704
      ],
      [
        0.856719434261322,
        1.7998400926589966
      ],
      [
        0.8641700744628906,
        1.8337457180023193
      ],
      [
        0.8703880906105042,
        1.8678812980651855
      ],
      [
        0.875261127948761,
        1.9022676944732666
      ],
      [
        0.8786745071411133,
        1.9369266033172607
      ],
      [
        0.8805115222930908,
        1.971879482269287
      ],
      [
        0.8806543946266174,
        2.0071487426757812
      ],
      [
        0.8789843320846558,
        2.0427565574645996
      ],
      [
        0.8753821849822998,
        2.0787250995635986
      ],
      [
        0.869838297367096,
        2.1150567531585693
      ],
      [
        0.8624524474143982,
        2.1517333984375
      ],
      [
        0.8533244729042053,
        2.1887366771698
      ],
      [
        0.8425538539886475,
        2.226048469543457
      ],
      [
        0.8302397131919861,
        2.263650894165039
      ],
      [
        0.816480278968811,
        2.301525831222534
      ],
      [
        0.8013728260993958,
        2.3396553993225098
      ],
      [
        0.7850133180618286,
        2.3780221939086914
      ],
      [
        0.7674961686134338,
        2.4166083335876465
      ],
      [
        0.7489144802093506,
        2.4553961753845215
      ],
      [
        0.7293595671653748,
        2.494368553161621
      ],
      [
        0.7089214324951172,
        2.533508062362671
      ],
      [
        0.6876888871192932,
        2.5727970600128174
      ],
      [
        0.665749728679657,
        2.6122183799743652
      ],
      [
        0.6431912779808044,
        2.651754379272461
      ],
      [
        0.6200692653656006,
        2.691394805908203
      ],
      [
        0.5964087247848511,
        2.731137275695801
      ],
      [
        0.5722349882125854,
        2.770979166030884
      ],
      [
        0.5475742816925049,
        2.810917615890503
      ],
      [
        0.5224538445472717,
        2.8509483337402344
      ],
      [
        0.49690231680870056,
        2.8910679817199707
      ],
      [
        0.47094982862472534,
        2.93127179145813
      ],
      [
        0.44462811946868896,
        2.971554756164551
      ],
      [
        0.4179706573486328,
        3.011911630630493
      ],
      [
        0.3910125195980072,
        3.0523364543914795
      ],
      [
        0.3637904226779938,
        3.0928235054016113
      ],
      [
        0.336342453956604,
        3.133366107940674
      ],
      [
        0.3087080717086792,
        3.173957586288452
      ],
      [
        0.2809275984764099,
        3.2145910263061523
      ],
      [
        0.25304216146469116,
        3.2552595138549805
      ],
      [
        0.2250932902097702,
        3.2959556579589844
      ],
      [
        0.19712261855602264,
        3.336672067642212
      ],
      [
        0.16917157173156738,
        3.3774020671844482
      ],
      [
        0.14128103852272034,
        3.418138027191162
      ],
      [
        0.11349108070135117,
        3.4588732719421387
      ],
      [
        0.08584070205688477,
        3.499600648880005
      ],
      [
        0.05836760997772217,
        3.540313243865967
      ],
      [
        0.031108075752854347,
        3.5810046195983887
      ],
      [
        0.004096813499927521,
        3.6216683387756348
      ],
      [
        -0.022633060812950134,
        3.6622982025146484
      ],
      [
        -0.049050018191337585,
        3.702888250350952
      ],
      [
        -0.07512406259775162,
        3.7434325218200684
      ],
      [
        -0.10082659870386124,
        3.7839250564575195
      ],
      [
        -0.12613031268119812,
        3.8243606090545654
      ],
      [
        -0.1510089486837387,
        3.8647332191467285
      ],
      [
        -0.17543718218803406,
        3.9050378799438477
      ],
      [
        -0.19939035177230835,
        3.9452688694000244
      ],
      [
        -0.22284428775310516,
        3.9854207038879395
      ],
      [
        -0.24577516317367554,
        4.025487899780273
      ],
      [
        -0.26815930008888245,
        4.065464496612549
      ],
      [
        -0.28985288739204407,
        4.1053338050842285
      ],
      [
        -0.3105919361114502,
        4.145066261291504
      ],
      [
        -0.33011239767074585,
        4.184632301330566
      ],
      [
        -0.34815022349357605,
        4.224003314971924
      ],
      [
        -0.3644413650035858,
        4.263148784637451
      ],
      [
        -0.3787217438220978,
        4.302039623260498
      ],
      [
        -0.3907274007797241,
        4.340645790100098
      ],
      [
        -0.4001944959163666,
        4.378937721252441
      ],
      [
        -0.40685927867889404,
        4.416884899139404
      ],
      [
        -0.41045811772346497,
        4.4544572830200195
      ],
      [
        -0.41072750091552734,
        4.491624355316162
      ],
      [
        -0.40740400552749634,
        4.528356552124023
      ],
      [
        -0.4004855453968048,
        4.564655303955078
      ],
      [
        -0.3902314007282257,
        4.600553512573242
      ],
      [
        -0.37690070271492004,
        4.636085033416748
      ],
      [
        -0.3607523441314697,
        4.67128324508667
      ],
      [
        -0.3420449197292328,
        4.706181049346924
      ],
      [
        -0.32103657722473145,
        4.740812301635742
      ],
      [
        -0.2979850471019745,
        4.775211334228516
      ],
      [
        -0.2731475234031677,
        4.809412002563477
      ],
      [
        -0.24678070843219757,
        4.843447685241699
      ],
      [
        -0.21914079785346985,
        4.877353668212891
      ],
      [
        -0.19048355519771576,
        4.911164283752441
      ],
      [
        -0.16106447577476501,
        4.944913864135742
      ],
      [
        -0.13102509081363678,
        4.978619575500488
      ],
      [
        -0.10039319843053818,
        5.0122785568237305
      ],
      [
        -0.06919693201780319,
        5.0458903312683105
      ],
      [
        -0.03746507689356804,
        5.079452991485596
      ],
      [
        -0.005227350629866123,
        5.1129655838012695
      ],
      [
        0.027485284954309464,
        5.146428108215332
      ],
      [
        0.06064034625887871,
        5.179840087890625
      ],
      [
        0.09420355409383774,
        5.213201522827148
      ],
      [
        0.12813864648342133,
        5.246513843536377
      ],
      [
        0.16240715980529785,
        5.279778003692627
      ],
      [
        0.19696833193302155,
        5.312995910644531
      ],
      [
        0.2317790538072586,
        5.3461689949035645
      ],
      [
        0.266793817281723,
        5.379299640655518
      ],
      [
        0.30196475982666016,
        5.412391662597656
      ],
      [
        0.3372417688369751,
        5.4454474449157715
      ],
      [
        0.37257257103919983,
        5.478470325469971
      ],
      [
        0.40790289640426636,
        5.511464595794678
      ],
      [
        0.4431767165660858,
        5.544434547424316
      ],
      [
        0.478336364030838,
        5.577383518218994
      ],
      [
        0.5133227705955505,
        5.610316753387451
      ],
      [
        0.5480756759643555,
        5.643238544464111
      ],
      [
        0.5825338363647461,
        5.676154136657715
      ],
      [
        0.6166353225708008,
        5.709067344665527
      ],
      [
        0.6503174901008606,
        5.741983413696289
      ],
      [
        0.6835171580314636,
        5.774907112121582
      ],
      [
        0.7161709666252136,
        5.8078436851501465
      ],
      [
        0.7482153177261353,
        5.840796947479248
      ],
      [
        0.7795863747596741,
        5.873773097991943
      ],
      [
        0.8102203011512756,
        5.906775951385498
      ],
      [
        0.8400532603263855,
        5.939810276031494
      ],
      [
        0.8690049648284912,
        5.9728803634643555
      ],
      [
        0.8969439268112183,
        6.005988121032715
      ],
      [
        0.9237036108970642,
        6.039134979248047
      ],
      [
        0.9491175413131714,
        6.072321891784668
      ],
      [
        0.9730191230773926,
        6.105550289154053
      ],
      [
        0.9952419996261597,
        6.138820171356201
      ],
      [
        1.0156195163726807,
        6.172133445739746
      ],
      [
        1.0339852571487427,
        6.205490589141846
      ],
      [
        1.0501729249954224,
        6.238892555236816
      ],
      [
        1.0640157461166382,
        6.272340774536133
      ],
      [
        1.075347661972046,
        6.305835723876953
      ],
      [
        1.084001898765564,
        6.339378356933594
      ],
      [
        1.0898123979568481,
        6.372969627380371
      ],
      [
        1.0927863121032715,
        6.406609058380127
      ],
      [
        1.093105435371399,
        6.440292835235596
      ],
      [
        1.090950846672058,
        6.474018096923828
      ],
      [
        1.0865041017532349,
        6.507782459259033
      ],
      [
        1.079946517944336,
        6.541581630706787
      ],
      [
        1.0714592933654785,
        6.575413227081299
      ],
      [
        1.0612238645553589,
        6.609274387359619
      ],
      [
        1.0494211912155151,
        6.643161773681641
      ],
      [
        1.036232590675354,
        6.677072525024414
      ],
      [
        1.0218391418457031,
        6.71100378036499
      ],
      [
        1.006421685218811,
        6.7449517250061035
      ],
      [
        0.9901613593101501,
        6.778914451599121
      ],
      [
        0.9732388854026794,
        6.812887668609619
      ],
      [
        0.9558353424072266,
        6.846869468688965
      ],
      [
        0.9380521178245544,
        6.880856990814209
      ],
      [
        0.9199115633964539,
        6.91485071182251
      ],
      [
        0.9014359712600708,
        6.948850154876709
      ],
      [
        0.8826476335525513,
        6.98285436630249
      ],
      [
        0.8635689616203308,
        7.016862869262695
      ],
      [
        0.8442226052284241,
        7.050876140594482
      ],
      [
        0.8246312141418457,
        7.084892749786377
      ],
      [
        0.8048177361488342,
        7.118912696838379
      ],
      [
        0.7848052978515625,
        7.15293550491333
      ],
      [
        0.7646171450614929,
        7.186960697174072
      ],
      [
        0.7442769408226013,
        7.220987796783447
      ],
      [
        0.7238084673881531,
        7.255016326904297
      ],
      [
        0.703235924243927,
        7.289045810699463
      ],
      [
        0.682583749294281,
        7.323075771331787
      ],
      [
        0.6618766784667969,
        7.357105731964111
      ],
      [
        0.6411397457122803,
        7.391135215759277
      ],
      [
        0.6203982830047607,
        7.425163269042969
      ],
      [
        0.5996780395507812,
        7.4591898918151855
      ],
      [
        0.5790049433708191,
        7.4932146072387695
      ],
      [
        0.5584054589271545,
        7.5272369384765625
      ],
      [
        0.5379061102867126,
        7.56125545501709
      ],
      [
        0.5175339579582214,
        7.595271110534668
      ],
      [
        0.4973162114620209,
        7.629281997680664
      ],
      [
        0.4772806167602539,
        7.663288116455078
      ],
      [
        0.457455039024353,
        7.697288990020752
      ],
      [
        0.43786779046058655,
        7.731284141540527
      ],
      [
        0.41854748129844666,
        7.765272617340088
      ],
      [
        0.3995230197906494,
        7.799253940582275
      ],
      [
        0.38082367181777954,
        7.833227634429932
      ],
      [
        0.3624790608882904,
        7.867193222045898
      ],
      [
        0.34451907873153687,
        7.901149749755859
      ],
      [
        0.32697397470474243,
        7.935096263885498
      ],
      [
        0.3098742961883545,
        7.969033241271973
      ],
      [
        0.2932509779930115,
        8.002959251403809
      ],
      [
        0.2771351933479309,
        8.036873817443848
      ],
      [
        0.2615584135055542,
        8.070775985717773
      ],
      [
        0.24655242264270782,
        8.10466480255127
      ],
      [
        0.23214924335479736,
        8.138541221618652
      ],
      [
        0.21838118135929108,
        8.172403335571289
      ],
      [
        0.20528072118759155,
        8.206250190734863
      ],
      [
        0.19288058578968048,
        8.240081787109375
      ],
      [
        0.18121366202831268,
        8.273897171020508
      ],
      [
        0.17031297087669373,
        8.307695388793945
      ],
      [
        0.1602116823196411,
        8.341476440429688
      ],
      [
        0.15094305574893951,
        8.375238418579102
      ],
      [
        0.14254039525985718,
        8.408982276916504
      ],
      [
        0.13503706455230713,
        8.442705154418945
      ],
      [
        0.12846647202968597,
        8.476408958435059
      ],
      [
        0.12282804399728775,
        8.510091781616211
      ],
      [
        0.11808725446462631,
        8.543754577636719
      ],
      [
        0.11420957744121552,
        8.577399253845215
      ],
      [
        0.11116043478250504,
        8.6110258102417
      ],
      [
        0.10890527069568634,
        8.644634246826172
      ],
      [
        0.10740949213504791,
        8.678226470947266
      ],
      [
        0.10663850605487823,
        8.711803436279297
      ],
      [
        0.1065577045083046,
        8.74536418914795
      ],
      [
        0.10713250190019608,
        8.778911590576172
      ],
      [
        0.10832829773426056,
        8.812445640563965
      ],
      [
        0.11011053621768951,
        8.845966339111328
      ],
      [
        0.112444669008255,
        8.879474639892578
      ],
      [
        0.11529617011547089,
        8.912972450256348
      ],
      [
        0.11863056570291519,
        8.946459770202637
      ],
      [
        0.12241341173648834,
        8.979937553405762
      ],
      [
        0.12661030888557434,
        9.013405799865723
      ],
      [
        0.1311868578195572,
        9.046866416931152
      ],
      [
        0.13610877096652985,
        9.080318450927734
      ],
      [
        0.14134173095226288,
        9.113764762878418
      ],
      [
        0.14685149490833282,
        9.147205352783203
      ],
      [
        0.15260380506515503,
        9.18064022064209
      ],
      [
        0.158564493060112,
        9.214071273803711
      ],
      [
        0.16469934582710266,
        9.24749755859375
      ],
      [
        0.17097420990467072,
        9.280921936035156
      ],
      [
        0.17735493183135986,
        9.314343452453613
      ],
      [
        0.1838073432445526,
        9.347764015197754
      ],
      [
        0.19029729068279266,
        9.381183624267578
      ],
      [
        0.19679751992225647,
        9.414603233337402
      ],
      [
        0.20329780876636505,
        9.448022842407227
      ],
      [
        0.2097981721162796,
        9.481443405151367
      ],
      [
        0.21629860997200012,
        9.514863014221191
      ],
      [
        0.22279909253120422,
        9.548283576965332
      ],
      [
        0.2292996346950531,
        9.581703186035156
      ],
      [
        0.23580022156238556,
        9.615123748779297
      ],
      [
        0.2423008531332016,
        9.648544311523438
      ],
      [
        0.24880154430866241,
        9.681964874267578
      ],
      [
        0.2553022503852844,
        9.715385437011719
      ],
      [
        0.2618030309677124,
        9.74880599975586
      ],
      [
        0.2683038115501404,
        9.7822265625
      ],
      [
        0.27480462193489075,
        9.815648078918457
      ],
      [
        0.2813054919242859,
        9.849068641662598
      ],
      [
        0.28780636191368103,
        9.882490158081055
      ],
      [
        0.29430726170539856,
        9.915910720825195
      ],
      [
        0.3008081614971161,
        9.949331283569336
      ],
      [
        0.307309091091156,
        9.982752799987793
      ],
      [
        0.3138100206851959,
        10.01617431640625
      ],
      [
        0.3203109800815582,
        10.04959487915039
      ],
      [
        0.32681193947792053,
        10.083016395568848
      ],
      [
        0.33331289887428284,
        10.116436958312988
      ],
      [
        0.33981388807296753,
        10.149858474731445
      ],
      [
        0.34631484746932983,
        10.183279991149902
      ],
      [
        0.3528158366680145,
        10.216700553894043
      ],
      [
        0.3593168258666992,
        10.2501220703125
      ],
      [
        0.3658177852630615,
        10.283543586730957
      ],
      [
        0.3723187744617462,
        10.316965103149414
      ],
      [
        0.3804450035095215,
        10.35874080657959
      ],
      [
        0.3853207528591156,
        10.383807182312012
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/05_root_path/meta.json
================================================
{
  "text": "A person is casually walking forward slowly",
  "duration": 10.0,
  "num_samples": 1,
  "seed": 42,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/06_root_waypoints/constraints.json
================================================
[
  {
    "type": "root2d",
    "frame_indices": [
      0,
      90,
      180
    ],
    "smooth_root_2d": [
      [
        0.0,
        -0.013232914730906487
      ],
      [
        -1.1690130233764648,
        1.5332785844802856
      ],
      [
        0.738669753074646,
        1.4469488859176636
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/06_root_waypoints/meta.json
================================================
{
  "text": "A person is doing a hip hop dance while moving around",
  "duration": 6.033333333333333,
  "num_samples": 1,
  "seed": 42,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/07_mixed_constraints/constraints.json
================================================
[
  {
    "type": "fullbody",
    "frame_indices": [
      108
    ],
    "local_joints_rot": [
      [
        [
          -0.035887543112039566,
          -0.02776639349758625,
          -0.005372282117605209
        ],
        [
          0.06515975296497345,
          -0.010784560814499855,
          0.006556123960763216
        ],
        [
          -0.06292378902435303,
          -0.05156821012496948,
          -0.009085050784051418
        ],
        [
          0.11570766568183899,
          -0.0793282613158226,
          -0.03867234289646149
        ],
        [
          0.09106606245040894,
          0.06571822613477707,
          0.002558206906542182
        ],
        [
          -0.06086159870028496,
          0.10295507311820984,
          0.02592187374830246
        ],
        [
          -0.15437740087509155,
          0.16596992313861847,
          0.009326435625553131
        ],
        [
          -0.0005251984694041312,
          0.0018051519291475415,
          -9.946066711563617e-05
        ],
        [
          -0.184775248169899,
          -0.064349465072155,
          0.00573313795030117
        ],
        [
          -0.18454650044441223,
          0.068090058863163,
          -0.005659883841872215
        ],
        [
          0.20501427352428436,
          -0.14578332006931305,
          -0.04773213341832161
        ],
        [
          0.26504039764404297,
          -0.16855353116989136,
          -1.0829373598098755
        ],
        [
          0.006512798834592104,
          -0.6961542367935181,
          -0.011537229642271996
        ],
        [
          0.07062757760286331,
          0.03925099968910217,
          -0.027518808841705322
        ],
        [
          0.14896969497203827,
          0.29287680983543396,
          -0.2919791340827942
        ],
        [
          0.009383739903569221,
          0.0508926659822464,
          -1.056564450263977
        ],
        [
          0.11172245442867279,
          0.12029653787612915,
          -0.12930497527122498
        ],
        [
          -0.41130027174949646,
          -0.5924108028411865,
          -0.0006285393028520048
        ],
        [
          0.006594705395400524,
          0.4732210040092468,
          -0.002528452081605792
        ],
        [
          -0.32021215558052063,
          -0.25638389587402344,
          -0.3734903335571289
        ],
        [
          0.09024477005004883,
          -0.2926441431045532,
          0.2660353481769562
        ],
        [
          -0.09575983881950378,
          -0.055268142372369766,
          0.8844737410545349
        ],
        [
          -0.0118059441447258,
          0.07546520978212357,
          0.0746397078037262
        ],
        [
          0.8310757875442505,
          -0.012923321686685085,
          0.004925338551402092
        ],
        [
          0.03474503755569458,
          -0.23956389725208282,
          -0.16712959110736847
        ],
        [
          -0.09206951409578323,
          -0.03187529370188713,
          0.027407124638557434
        ],
        [
          -0.2677958309650421,
          0.11606352031230927,
          0.036957308650016785
        ],
        [
          0.394832044839859,
          -0.0007178321247920394,
          0.0004849981633014977
        ],
        [
          -0.09032224863767624,
          -0.14483025670051575,
          -0.015989331528544426
        ],
        [
          -0.0217722300440073,
          0.01900928094983101,
          -0.025495363399386406
        ]
      ]
    ],
    "root_positions": [
      [
        -0.09470777958631516,
        0.9947724342346191,
        -3.980208396911621
      ]
    ],
    "smooth_root_2d": [
      [
        -0.09470777958631516,
        -3.980208396911621
      ]
    ]
  },
  {
    "type": "root2d",
    "frame_indices": [
      0,
      1,
      2,
      3,
      4,
      5,
      6,
      7,
      8,
      9,
      10,
      11,
      12,
      13,
      14,
      15,
      16,
      17,
      18,
      19,
      20,
      21,
      22,
      23,
      24,
      25,
      26,
      27,
      28,
      29,
      30,
      31,
      32,
      33,
      34,
      35,
      36,
      37,
      38,
      39,
      40,
      41,
      42,
      43,
      44,
      45,
      46,
      47,
      48,
      49,
      50,
      51,
      52,
      53,
      54,
      55,
      56,
      57,
      58,
      59,
      60,
      61,
      62,
      63,
      64,
      65,
      66,
      67,
      68,
      69,
      70,
      71,
      72,
      73,
      74,
      75,
      76,
      77,
      78,
      79,
      80,
      81,
      82,
      83,
      84,
      85,
      86,
      87,
      88,
      89,
      90,
      91,
      92,
      93,
      94,
      95,
      96,
      97,
      98,
      99,
      100,
      101,
      102,
      103,
      104,
      105,
      106,
      107,
      108,
      109,
      110,
      111,
      112,
      113,
      114,
      115,
      116,
      117,
      118,
      119,
      120,
      121,
      122,
      123,
      124,
      125,
      126,
      127,
      128,
      129,
      130,
      131,
      132,
      133,
      134,
      135,
      136,
      137,
      138,
      139,
      140,
      141,
      142,
      143,
      144,
      145,
      146,
      147,
      148,
      149,
      150,
      151
    ],
    "smooth_root_2d": [
      [
        -0.022358937188982964,
        0.03532936051487923
      ],
      [
        -0.024468135088682175,
        -0.0013195642968639731
      ],
      [
        -0.02657654881477356,
        -0.037969205528497696
      ],
      [
        -0.028683679178357124,
        -0.07462010532617569
      ],
      [
        -0.030789025127887726,
        -0.11127285659313202
      ],
      [
        -0.032892078161239624,
        -0.14792808890342712
      ],
      [
        -0.03499194607138634,
        -0.184586301445961
      ],
      [
        -0.03708736225962639,
        -0.2212478667497635
      ],
      [
        -0.03917701542377472,
        -0.25791314244270325
      ],
      [
        -0.04125956818461418,
        -0.2945826053619385
      ],
      [
        -0.04333365708589554,
        -0.3312567174434662
      ],
      [
        -0.045397885143756866,
        -0.3679359555244446
      ],
      [
        -0.04745082929730415,
        -0.4046209156513214
      ],
      [
        -0.04949106276035309,
        -0.44131216406822205
      ],
      [
        -0.05151714012026787,
        -0.4780103266239166
      ],
      [
        -0.05352761223912239,
        -0.5147159695625305
      ],
      [
        -0.05552104488015175,
        -0.551429808139801
      ],
      [
        -0.05749599635601044,
        -0.5881525278091431
      ],
      [
        -0.059451062232255936,
        -0.6248847842216492
      ],
      [
        -0.061384834349155426,
        -0.6616273522377014
      ],
      [
        -0.06329593807458878,
        -0.6983808875083923
      ],
      [
        -0.06518300622701645,
        -0.7351461052894592
      ],
      [
        -0.06704472005367279,
        -0.7719237804412842
      ],
      [
        -0.06887973845005035,
        -0.8087146878242493
      ],
      [
        -0.07068677246570587,
        -0.8455195426940918
      ],
      [
        -0.07246451079845428,
        -0.8823391795158386
      ],
      [
        -0.07421167194843292,
        -0.9191742539405823
      ],
      [
        -0.07592695951461792,
        -0.9560256004333496
      ],
      [
        -0.07760907709598541,
        -0.9928940534591675
      ],
      [
        -0.07925672084093094,
        -1.029780387878418
      ],
      [
        -0.08086856454610825,
        -1.0666853189468384
      ],
      [
        -0.0824432522058487,
        -1.1036096811294556
      ],
      [
        -0.08397942036390305,
        -1.1405543088912964
      ],
      [
        -0.08547566086053848,
        -1.1775201559066772
      ],
      [
        -0.08693055063486099,
        -1.2145079374313354
      ],
      [
        -0.08834262937307358,
        -1.2515183687210083
      ],
      [
        -0.08971039950847626,
        -1.2885526418685913
      ],
      [
        -0.09103234112262726,
        -1.3256113529205322
      ],
      [
        -0.09230689704418182,
        -1.362695574760437
      ],
      [
        -0.0935325101017952,
        -1.3998061418533325
      ],
      [
        -0.09470757842063904,
        -1.4369438886642456
      ],
      [
        -0.09583047777414322,
        -1.4741098880767822
      ],
      [
        -0.0968996062874794,
        -1.5113049745559692
      ],
      [
        -0.09791331738233566,
        -1.548530101776123
      ],
      [
        -0.09886999428272247,
        -1.58578622341156
      ],
      [
        -0.0997680053114891,
        -1.6230742931365967
      ],
      [
        -0.10060573369264603,
        -1.6603953838348389
      ],
      [
        -0.1013815775513649,
        -1.6977503299713135
      ],
      [
        -0.10209395736455917,
        -1.7351402044296265
      ],
      [
        -0.1027413085103035,
        -1.7725658416748047
      ],
      [
        -0.10332208126783371,
        -1.8100284337997437
      ],
      [
        -0.10383477061986923,
        -1.8475286960601807
      ],
      [
        -0.10427788645029068,
        -1.8850678205490112
      ],
      [
        -0.10464996099472046,
        -1.9226467609405518
      ],
      [
        -0.10494954138994217,
        -1.9602664709091187
      ],
      [
        -0.10517755895853043,
        -1.997925877571106
      ],
      [
        -0.10533731430768967,
        -2.0356218814849854
      ],
      [
        -0.10543208569288254,
        -2.0733516216278076
      ],
      [
        -0.10546516627073288,
        -2.111111879348755
      ],
      [
        -0.10543984919786453,
        -2.148899555206299
      ],
      [
        -0.10535937547683716,
        -2.1867120265960693
      ],
      [
        -0.10522699356079102,
        -2.224546194076538
      ],
      [
        -0.10504589974880219,
        -2.262399435043335
      ],
      [
        -0.10481927543878555,
        -2.3002686500549316
      ],
      [
        -0.10455025732517242,
        -2.338151216506958
      ],
      [
        -0.10424194484949112,
        -2.376044511795044
      ],
      [
        -0.10389743000268936,
        -2.4139459133148193
      ],
      [
        -0.10351976752281189,
        -2.451852560043335
      ],
      [
        -0.10311200469732285,
        -2.4897620677948
      ],
      [
        -0.10267717391252518,
        -2.5276718139648438
      ],
      [
        -0.10221832990646362,
        -2.5655791759490967
      ],
      [
        -0.10173854231834412,
        -2.6034812927246094
      ],
      [
        -0.10124091058969498,
        -2.64137601852417
      ],
      [
        -0.10072856396436691,
        -2.67926025390625
      ],
      [
        -0.100204698741436,
        -2.7171311378479004
      ],
      [
        -0.09967257082462311,
        -2.754986047744751
      ],
      [
        -0.09913549572229385,
        -2.7928221225738525
      ],
      [
        -0.09859687089920044,
        -2.8306362628936768
      ],
      [
        -0.09806016832590103,
        -2.8684253692626953
      ],
      [
        -0.09752892702817917,
        -2.906186103820801
      ],
      [
        -0.09700676798820496,
        -2.943915367126465
      ],
      [
        -0.09649737179279327,
        -2.98160982131958
      ],
      [
        -0.09600447863340378,
        -3.019265651702881
      ],
      [
        -0.09553186595439911,
        -3.056879758834839
      ],
      [
        -0.09508336335420609,
        -3.0944483280181885
      ],
      [
        -0.09466280788183212,
        -3.131967782974243
      ],
      [
        -0.09427405893802643,
        -3.1694345474243164
      ],
      [
        -0.09392096847295761,
        -3.2068448066711426
      ],
      [
        -0.09360739588737488,
        -3.244194984436035
      ],
      [
        -0.09333716332912445,
        -3.2814812660217285
      ],
      [
        -0.09311125427484512,
        -3.3187034130096436
      ],
      [
        -0.09292776882648468,
        -3.355863571166992
      ],
      [
        -0.0927848145365715,
        -3.3929643630981445
      ],
      [
        -0.09268050640821457,
        -3.4300084114074707
      ],
      [
        -0.09261301904916763,
        -3.4669981002807617
      ],
      [
        -0.09258053451776505,
        -3.5039358139038086
      ],
      [
        -0.09258133918046951,
        -3.5408236980438232
      ],
      [
        -0.09261377900838852,
        -3.5776638984680176
      ],
      [
        -0.0926763191819191,
        -3.6144583225250244
      ],
      [
        -0.09276753664016724,
        -3.6512088775634766
      ],
      [
        -0.09288612008094788,
        -3.687917470932007
      ],
      [
        -0.0930309146642685,
        -3.7245850563049316
      ],
      [
        -0.09320087730884552,
        -3.7612133026123047
      ],
      [
        -0.09339512139558792,
        -3.7978031635284424
      ],
      [
        -0.09361287951469421,
        -3.8343558311462402
      ],
      [
        -0.09385351091623306,
        -3.8708720207214355
      ],
      [
        -0.09411647915840149,
        -3.9073524475097656
      ],
      [
        -0.09440135210752487,
        -3.9437978267669678
      ],
      [
        -0.09470777958631516,
        -3.980208396911621
      ],
      [
        -0.09503547102212906,
        -4.016584873199463
      ],
      [
        -0.09538418799638748,
        -4.052927494049072
      ],
      [
        -0.09575372189283371,
        -4.089236736297607
      ],
      [
        -0.09614387899637222,
        -4.125512599945068
      ],
      [
        -0.0965544655919075,
        -4.1617560386657715
      ],
      [
        -0.09698529541492462,
        -4.197966575622559
      ],
      [
        -0.09743614494800568,
        -4.234145641326904
      ],
      [
        -0.09790677577257156,
        -4.27029275894165
      ],
      [
        -0.09839694201946259,
        -4.306408882141113
      ],
      [
        -0.09890634566545486,
        -4.342494487762451
      ],
      [
        -0.09943470358848572,
        -4.378549575805664
      ],
      [
        -0.09998169541358948,
        -4.41457462310791
      ],
      [
        -0.10054702311754227,
        -4.450570583343506
      ],
      [
        -0.10113038867712021,
        -4.486537456512451
      ],
      [
        -0.10173150897026062,
        -4.522475242614746
      ],
      [
        -0.1023501306772232,
        -4.558384895324707
      ],
      [
        -0.10298605263233185,
        -4.594265937805176
      ],
      [
        -0.10363911837339401,
        -4.6301188468933105
      ],
      [
        -0.10430921614170074,
        -4.665942668914795
      ],
      [
        -0.10499630123376846,
        -4.701738357543945
      ],
      [
        -0.10570038110017776,
        -4.737504482269287
      ],
      [
        -0.10642150044441223,
        -4.7732415199279785
      ],
      [
        -0.10715975612401962,
        -4.808948040008545
      ],
      [
        -0.10791526734828949,
        -4.844624042510986
      ],
      [
        -0.10868816822767258,
        -4.880269527435303
      ],
      [
        -0.10947857797145844,
        -4.915882587432861
      ],
      [
        -0.11028657108545303,
        -4.9514641761779785
      ],
      [
        -0.11111218482255936,
        -4.98701286315918
      ],
      [
        -0.11195536702871323,
        -5.022529602050781
      ],
      [
        -0.1128159612417221,
        -5.058013439178467
      ],
      [
        -0.11369368433952332,
        -5.093465328216553
      ],
      [
        -0.11458808928728104,
        -5.128885746002197
      ],
      [
        -0.11549859493970871,
        -5.164275646209717
      ],
      [
        -0.11642441153526306,
        -5.199635982513428
      ],
      [
        -0.11736457794904709,
        -5.234969615936279
      ],
      [
        -0.11831795424222946,
        -5.270277500152588
      ],
      [
        -0.11928320676088333,
        -5.305562496185303
      ],
      [
        -0.12025882303714752,
        -5.340827941894531
      ],
      [
        -0.12124315649271011,
        -5.3760762214660645
      ],
      [
        -0.12223441153764725,
        -5.41131067276001
      ],
      [
        -0.12323068082332611,
        -5.446536064147949
      ],
      [
        -0.12448007613420486,
        -5.4905595779418945
      ],
      [
        -0.1252303272485733,
        -5.516972541809082
      ]
    ]
  }
]


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/07_mixed_constraints/meta.json
================================================
{
  "text": "A person walking backward points to the right side with their right hand",
  "duration": 5.066666666666666,
  "num_samples": 1,
  "seed": 49,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/demo/examples/kimodo-soma-rp/08_stylized_text/meta.json
================================================
{
  "text": "A zombie with their left arm extended forward walks with an uneven gait at a slow pace.",
  "duration": 4.033333333333333,
  "num_samples": 1,
  "seed": 42,
  "diffusion_steps": 100,
  "cfg": {
    "enabled": true,
    "text_weight": 2.0,
    "constraint_weight": 2.0
  }
}


================================================
FILE: kimodo/assets/skeletons/g1skel34/xml/g1.xml
================================================
<mujoco model="g1">
    <compiler angle="radian" meshdir="../meshes/g1"/>

    <default>
        <default class="g1">
            <geom contype="0" conaffinity="0"/>

            <joint frictionloss="0.1" solimplimit="0.97 0.995 0.001"/>

            <default class="hip">
                <default class="hip_pitch">
                    <joint axis="0 1 0" range="-2.5307 2.8798" actuatorfrcrange="-88 88" armature="0.01017752004"/>
                </default>
                <default class="hip_roll">
                    <joint axis="1 0 0" actuatorfrcrange="-139 139" armature="0.025101925"/>
                </default>
                <default class="hip_yaw">
                    <joint axis="0 0 1" range="-2.7576 2.7576" actuatorfrcrange="-88 88" armature="0.01017752004"/>
                </default>
            </default>
            <default class="knee">
                <joint axis="0 1 0" range="-0.087267 2.8798" actuatorfrcrange="-139 139" armature="0.025101925"/>
            </default>
            <default class="ankle">
                <default class="ankle_pitch">
                    <joint axis="0 1 0" range="-0.87267 0.5236" actuatorfrcrange="-50 50" armature="0.00721945"/>
                </default>
                <default class="ankle_roll">
                    <joint axis="1 0 0" range="-0.2618 0.2618" actuatorfrcrange="-50 50" armature="0.00721945"/>
                </default>
            </default>
            <default class="waist_yaw">
                <joint axis="0 0 1" range="-2.618 2.618" actuatorfrcrange="-88 88" armature="0.01017752004"/>
            </default>
            <default class="waist_pitch">
                <joint axis="0 1 0" range="-0.52 0.52" actuatorfrcrange="-50 50" armature="0.00721945"/>
            </default>
            <default class="waist_roll">
                <joint axis="1 0 0" range="-0.52 0.52" actuatorfrcrange="-50 50" armature="0.00721945"/>
            </default>
            <default class="shoulder">
                <default class="shoulder_pitch">
                    <joint axis="0 1 0" range="-3.0892 2.6704" actuatorfrcrange="-25 25" armature="0.003609725"/>
                </default>
                <default class="shoulder_roll">
                    <joint axis="1 0 0" actuatorfrcrange="-25 25" armature="0.003609725"/>
                </default>
                <default class="shoulder_yaw">
                    <joint axis="0 0 1" range="-2.618 2.618" actuatorfrcrange="-25 25" armature="0.003609725"/>
                </default>
            </default>
            <default class="elbow">
                <joint axis="0 1 0" range="-1.0472 2.0944" actuatorfrcrange="-25 25" armature="0.003609725"/>
            </default>
            <default class="wrist">
                <default class="wrist_roll">
                    <joint axis="1 0 0" range="-1.97222 1.97222" actuatorfrcrange="-25 25" armature="0.003609725"/>
                </default>
                <default class="wrist_pitch">
                    <joint axis="0 1 0" range="-1.61443 1.61443" actuatorfrcrange="-5 5" armature="0.00425"/>
                </default>
                <default class="wrist_yaw">
                    <joint axis="0 0 1" range="-1.61443 1.61443" actuatorfrcrange="-5 5" armature="0.00425"/>
                </default>
            </default>

            <default class="visual">
                <geom group="2" type="mesh" density="0" material="silver"/>
            </default>
            <default class="collision">
                <geom group="3" rgba=".2 .6 .2 .3" contype="1" conaffinity="1"/>
                <default class="foot">
                    <geom size="0.085 0.03 0.005"/>
                </default>
            </default>
            <site group="5" rgba="1 0 0 1"/>
        </default>
    </default>

    <asset>
        <material name="silver" rgba="0.7 0.7 0.7 1"/>
        <material name="black" rgba="0.2 0.2 0.2 1"/>

        <mesh name="pelvis" file="pelvis.STL"/>
        <mesh name="pelvis_contour_link" file="pelvis_contour_link.STL"/>
        <mesh name="left_hip_pitch_link" file="left_hip_pitch_link.STL"/>
        <mesh name="left_hip_roll_link" file="left_hip_roll_link.STL"/>
        <mesh name="left_hip_yaw_link" file="left_hip_yaw_link.STL"/>
        <mesh name="left_knee_link" file="left_knee_link.STL"/>
        <mesh name="left_ankle_pitch_link"
              file="left_ankle_pitch_link.STL"/>
        <mesh name="left_ankle_roll_link" file="left_ankle_roll_link.STL"/>
        <mesh name="right_hip_pitch_link" file="right_hip_pitch_link.STL"/>
        <mesh name="right_hip_roll_link" file="right_hip_roll_link.STL"/>
        <mesh name="right_hip_yaw_link" file="right_hip_yaw_link.STL"/>
        <mesh name="right_knee_link" file="right_knee_link.STL"/>
        <mesh name="right_ankle_pitch_link"
              file="right_ankle_pitch_link.STL"/>
        <mesh name="right_ankle_roll_link"
              file="right_ankle_roll_link.STL"/>
        <mesh name="waist_yaw_link" file="waist_yaw_link_rev_1_0.STL"/>
        <mesh name="waist_roll_link" file="waist_roll_link_rev_1_0.STL"/>
        <mesh name="torso_link" file="torso_link_rev_1_0.STL"/>
        <mesh name="logo_link" file="logo_link.STL"/>
        <mesh name="head_link" file="head_link.STL"/>
        <mesh name="left_shoulder_pitch_link"
              file="left_shoulder_pitch_link.STL"/>
        <mesh name="left_shoulder_roll_link"
              file="left_shoulder_roll_link.STL"/>
        <mesh name="left_shoulder_yaw_link"
              file="left_shoulder_yaw_link.STL"/>
        <mesh name="left_elbow_link" file="left_elbow_link.STL"/>
        <mesh name="left_wrist_roll_link" file="left_wrist_roll_link.STL"/>
        <mesh name="left_wrist_pitch_link"
              file="left_wrist_pitch_link.STL"/>
        <mesh name="left_wrist_yaw_link" file="left_wrist_yaw_link.STL"/>
        <mesh name="left_rubber_hand" file="left_rubber_hand.STL"/>
        <mesh name="right_shoulder_pitch_link"
              file="right_shoulder_pitch_link.STL"/>
        <mesh name="right_shoulder_roll_link"
              file="right_shoulder_roll_link.STL"/>
        <mesh name="right_shoulder_yaw_link"
              file="right_shoulder_yaw_link.STL"/>
        <mesh name="right_elbow_link" file="right_elbow_link.STL"/>
        <mesh name="right_wrist_roll_link"
              file="right_wrist_roll_link.STL"/>
        <mesh name="right_wrist_pitch_link"
              file="right_wrist_pitch_link.STL"/>
        <mesh name="right_wrist_yaw_link" file="right_wrist_yaw_link.STL"/>
        <mesh name="right_rubber_hand" file="right_rubber_hand.STL"/>
    </asset>

    <worldbody>
        <body name="pelvis" pos="0 0 0.793" childclass="g1">
            <inertial pos="0 0 -0.07605" quat="1 0 -0.000399148 0" mass="3.813" diaginertia="0.010549 0.0093089 0.0079184"/>
            <freejoint name="floating_base_joint"/>
            <geom class="visual" material="black" mesh="pelvis"/>
            <geom class="visual" mesh="pelvis_contour_link"/>
            <geom mesh="pelvis_contour_link" class="visual"/>
            <geom name="pelvis_collision" class="collision" type="sphere" size="0.07" pos="0 0 -0.08"/>
            <site name="imu_in_pelvis" size="0.01" pos="0.04525 0 -0.08339"/>
            <site name="pelvis" size="0.01" pos="0 0 0"/>
            <body name="left_hip_pitch_link" pos="0 0.064452 -0.1027">
                <inertial pos="0.002741 0.047791 -0.02606" quat="0.954862 0.293964 0.0302556 0.030122" mass="1.35"
                          diaginertia="0.00181517 0.00153422 0.00116212"/>
                <joint name="left_hip_pitch_joint" class="hip_pitch"/>
                <geom class="visual" material="black" mesh="left_hip_pitch_link"/>
                <geom material="black" mesh="left_hip_pitch_link" class="visual"/>
                <body name="left_hip_roll_link" pos="0 0.052 -0.030465" quat="0.996179 0 -0.0873386 0">
                    <inertial pos="0.029812 -0.001045 -0.087934" quat="0.977808 -1.97119e-05 0.205576 -0.0403793" mass="1.52"
                              diaginertia="0.00254986 0.00241169 0.00148755"/>
                    <joint name="left_hip_roll_joint" class="hip_roll" range="-0.5236 2.9671"/>
                    <geom class="visual" mesh="left_hip_roll_link"/>
                    <geom mesh="left_hip_roll_link" class="visual"/>
                    <geom name="left_thigh" class="collision" type="capsule" size="0.05" fromto="0.02 0 0 0.02 0 -0.2"/>
                    <body name="left_hip_yaw_link" pos="0.025001 0 -0.12412">
                        <inertial pos="-0.057709 -0.010981 -0.15078" quat="0.600598 0.15832 0.223482 0.751181" mass="1.702"
                                  diaginertia="0.00776166 0.00717575 0.00160139"/>
                        <joint name="left_hip_yaw_joint" class="hip_yaw"/>
                        <geom class="visual" mesh="left_hip_yaw_link"/>
                        <body name="left_knee_link" pos="-0.078273 0.0021489 -0.17734" quat="0.996179 0 0.0873386 0">
                            <inertial pos="0.005457 0.003964 -0.12074" quat="0.923418 -0.0327699 0.0158246 0.382067" mass="1.932"
                                      diaginertia="0.0113804 0.0112778 0.00146458"/>
                            <joint name="left_knee_joint" class="knee"/>
                            <geom class="visual" mesh="left_knee_link"/>
                            <geom name="left_shin" class="collision" type="capsule" size="0.04" fromto="0.02 0 0 0.02 0 -0.25"/>
                            <body name="left_ankle_pitch_link" pos="0 -9.4445e-05 -0.30001">
                                <inertial pos="-0.007269 0 0.011137" quat="0.603053 0.369225 0.369225 0.603053" mass="0.074"
                                          diaginertia="1.89e-05 1.40805e-05 6.9195e-06"/>
                                <joint name="left_ankle_pitch_joint" class="ankle_pitch"/>
                                <geom class="visual" mesh="left_ankle_pitch_link"/>
                                <body name="left_ankle_roll_link" pos="0 0 -0.017558">
                                    <site name="left_foot" rgba="1 0 0 1"/>
                                    <inertial pos="0.026505 0 -0.016425" quat="-0.000481092 0.728482 -0.000618967 0.685065" mass="0.608"
                                              diaginertia="0.00167218 0.0016161 0.000217621"/>
                                    <joint name="left_ankle_roll_joint" class="ankle_roll"/>
                                    <geom class="visual" material="black" mesh="left_ankle_roll_link"/>
                                    <geom name="left_foot1_collision" type="capsule" size="0.01" class="collision" fromto="0.1 -0.026 -0.025 0.05 -0.027 -0.025"/>
                                    <geom name="left_foot2_collision" type="capsule" size="0.01" class="collision" fromto="-0.044 -0.018 -0.025 0.123 -0.018 -0.025"/>
                                    <geom name="left_foot3_collision" type="capsule" size="0.01" class="collision" fromto="-0.052 -0.01 -0.025 0.13 -0.01 -0.025"/>
                                    <geom name="left_foot4_collision" type="capsule" size="0.01" class="collision" fromto="-0.054 0 -0.025 0.132 0 -0.025"/>
                                    <geom name="left_foot5_collision" type="capsule" size="0.01" class="collision" fromto="-0.052 0.01 -0.025 0.13 0.01 -0.025"/>
                                    <geom name="left_foot6_collision" type="capsule" size="0.01" class="collision" fromto="-0.044 0.018 -0.025 0.123 0.018 -0.025"/>
                                    <geom name="left_foot7_collision" type="capsule" size="0.01" class="collision" fromto="0.1 0.026 -0.025 0.05 0.026 -0.025"/>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
            <body name="right_hip_pitch_link" pos="0 -0.064452 -0.1027">
                <inertial pos="0.002741 -0.047791 -0.02606" quat="0.954862 -0.293964 0.0302556 -0.030122" mass="1.35"
                          diaginertia="0.00181517 0.00153422 0.00116212"/>
                <joint name="right_hip_pitch_joint" class="hip_pitch"/>
                <geom class="visual" material="black" mesh="right_hip_pitch_link"/>
                <body name="right_hip_roll_link" pos="0 -0.052 -0.030465" quat="0.996179 0 -0.0873386 0">
                    <inertial pos="0.029812 0.001045 -0.087934" quat="0.977808 1.97119e-05 0.205576 0.0403793" mass="1.52"
                              diaginertia="0.00254986 0.00241169 0.00148755"/>
                    <joint name="right_hip_roll_joint" class="hip_roll" range="-2.9671 0.5236"/>
                    <geom class="visual" mesh="right_hip_roll_link"/>
                    <geom name="right_thigh" class="collision" type="capsule" size="0.05" fromto="0.02 0 0 0.02 0 -0.2"/>
                    <body name="right_hip_yaw_link" pos="0.025001 0 -0.12412">
                        <inertial pos="-0.057709 0.010981 -0.15078" quat="0.751181 0.223482 0.15832 0.600598" mass="1.702"
                                  diaginertia="0.00776166 0.00717575 0.00160139"/>
                        <joint name="right_hip_yaw_joint" class="hip_yaw"/>
                        <geom class="visual" mesh="right_hip_yaw_link"/>
                        <body name="right_knee_link" pos="-0.078273 -0.0021489 -0.17734" quat="0.996179 0 0.0873386 0">
                            <inertial pos="0.005457 -0.003964 -0.12074" quat="0.923439 0.0345276 0.0116333 -0.382012" mass="1.932"
                                      diaginertia="0.011374 0.0112843 0.00146452"/>
                            <joint name="right_knee_joint" class="knee"/>
                            <geom class="visual" mesh="right_knee_link"/>
                            <geom name="right_shin" class="collision" type="capsule" size="0.04" fromto="0.02 0 0 0.02 0 -0.25"/>
                            <body name="right_ankle_pitch_link" pos="0 9.4445e-05 -0.30001">
                                <inertial pos="-0.007269 0 0.011137" quat="0.603053 0.369225 0.369225 0.603053" mass="0.074"
                                          diaginertia="1.89e-05 1.40805e-05 6.9195e-06"/>
                                <joint name="right_ankle_pitch_joint" class="ankle_pitch"/>
                                <geom class="visual" mesh="right_ankle_pitch_link"/>
                                <body name="right_ankle_roll_link" pos="0 0 -0.017558">
                                    <site name="right_foot" pos="0 0 0"/>
                                    <inertial pos="0.026505 0 -0.016425" quat="0.000481092 0.728482 0.000618967 0.685065" mass="0.608"
                                              diaginertia="0.00167218 0.0016161 0.000217621"/>
                                    <joint name="right_ankle_roll_joint" class="ankle_roll"/>
                                    <geom class="visual" material="black" mesh="right_ankle_roll_link"/>
                                    <geom name="right_foot1_collision" type="capsule" size="0.01" class="collision" fromto="0.1 -0.026 -0.025 0.05 -0.026 -0.025"/>
                                    <geom name="right_foot2_collision" type="capsule" size="0.008" class="collision" fromto="-0.044 -0.018 -0.025 0.123 -0.018 -0.025"/>
                                    <geom name="right_foot3_collision" type="capsule" size="0.01" class="collision" fromto="-0.052 -0.01 -0.025 0.13 -0.01 -0.025"/>
                                    <geom name="right_foot4_collision" type="capsule" size="0.01" class="collision" fromto="-0.054 0 -0.025 0.132 0 -0.025"/>
                                    <geom name="right_foot5_collision" type="capsule" size="0.01" class="collision" fromto="-0.052 0.01 -0.025 0.13 0.01 -0.025"/>
                                    <geom name="right_foot6_collision" type="capsule" size="0.008" class="collision" fromto="-0.044 0.018 -0.025 0.123 0.018 -0.025"/>
                                    <geom name="right_foot7_collision" type="capsule" size="0.01" class="collision" fromto="0.1 0.026 -0.025 0.05 0.026 -0.025"/>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
            <body name="waist_yaw_link">
                <inertial pos="0.003494 0.000233 0.018034" quat="0.289697 0.591001 -0.337795 0.672821" mass="0.214"
                          diaginertia="0.000163531 0.000107714 0.000102205"/>
                <joint name="waist_yaw_joint" class="waist_yaw"/>
                <geom class="visual" mesh="waist_yaw_link"/>
                <body name="waist_roll_link" pos="-0.0039635 0 0.044">
                    <inertial pos="0 2.3e-05 0" quat="0.5 0.5 -0.5 0.5" mass="0.086" diaginertia="8.245e-06 7.079e-06 6.339e-06"/>
                    <joint name="waist_roll_joint" class="waist_roll"/>
                    <geom class="visual" mesh="waist_roll_link"/>
                    <body name="torso_link">
                        <inertial pos="0.00203158 0.000339683 0.184568" quat="0.999803 -6.03319e-05 0.0198256 0.00131986"
                                  mass="7.818" diaginertia="0.121847 0.109825 0.0273735"/>
                        <joint name="waist_pitch_joint" class="waist_pitch"/>
                        <geom class="visual" mesh="torso_link"/>
                        <geom pos="0.0039635 0 -0.044" quat="1 0 0 0" class="visual" material="black" mesh="logo_link"/>
                        <geom pos="0.0039635 0 -0.044" class="visual" material="black" mesh="head_link"/>

                        <geom name="torso_collision1" class="collision" type="capsule" size="0.073" fromto="0.005 -0.032 .22 0.005 0.032 .22"/>
                        <geom name="torso_collision2" class="collision" type="capsule" size="0.07" fromto="0.005 -0.028 .13 0.005 0.028 .13"/>
                        <geom name="torso_collision3" class="collision" type="capsule" size="0.065" fromto="0.005 -0.02 .06 0.005 0.02 .06"/>
                        <geom name="head_collision" class="collision" type="capsule" size="0.068" fromto="0.01 0 .41 0.01 0 .42"/>

                        <site name="imu_in_torso" size="0.01" pos="-0.03959 -0.00224 0.14792"/>
                        <site name="mid360" size="0.01" pos="0.0002835 0.00003 0.41618" quat="0.00000094 -0.99979404 0.00004632 0.02029493"/>
                        <body name="left_shoulder_pitch_link" pos="0.0039563 0.10022 0.24778"
                              quat="0.990264 0.139201 1.38722e-05 -9.86868e-05">
                            <inertial pos="0 0.035892 -0.011628" quat="0.654152 0.0130458 -0.326267 0.68225" mass="0.718"
                                      diaginertia="0.000465864 0.000432842 0.000406394"/>
                            <joint name="left_shoulder_pitch_joint" class="shoulder_pitch"/>
                            <geom class="visual" mesh="left_shoulder_pitch_link"/>
                            <geom size="0.03 0.025" pos="0 0.04 -0.01" quat="0.707107 0 0.707107 0" type="cylinder" class="visual"/>
                            <body name="left_shoulder_roll_link" pos="0 0.038 -0.013831" quat="0.990268 -0.139172 0 0">
                                <inertial pos="-0.000227 0.00727 -0.063243" quat="0.701256 -0.0196223 -0.00710317 0.712604" mass="0.643"
                                          diaginertia="0.000691311 0.000618011 0.000388977"/>
                                <joint name="left_shoulder_roll_joint" range="-1.5882 2.2515" class="shoulder_roll"/>
                                <geom class="visual" mesh="left_shoulder_roll_link"/>
                                <geom size="0.03 0.015" pos="-0.004 0.006 -0.053" type="cylinder" class="visual"/>
                                <body name="left_shoulder_yaw_link" pos="0 0.00624 -0.1032">
                                    <inertial pos="0.010773 -0.002949 -0.072009" quat="0.716879 -0.0964829 -0.0679942 0.687134"
                                              mass="0.734" diaginertia="0.00106187 0.00103217 0.000400661"/>
                                    <joint name="left_shoulder_yaw_joint" class="shoulder_yaw"/>
                                    <geom class="visual" mesh="left_shoulder_yaw_link"/>
                                    <geom name="left_shoulder_yaw_collision" class="collision" type="capsule" size="0.035" fromto="0 0 -0.08 0 0 0.05"/>
                                    <body name="left_elbow_link" pos="0.015783 0 -0.080518">
                                        <inertial pos="0.064956 0.004454 -0.010062" quat="0.541765 0.636132 0.388821 0.388129" mass="0.6"
                                                  diaginertia="0.000443035 0.000421612 0.000259353"/>
                                        <joint name="left_elbow_joint" class="elbow"/>
                                        <geom class="visual" mesh="left_elbow_link"/>
                                        <geom name="left_elbow_yaw_collision" class="collision" type="capsule" size="0.035" fromto="-0.01 0 -0.01 0.12 0 -0.01"/>
                                        <body name="left_wrist_roll_link" pos="0.1 0.00188791 -0.01">
                                            <inertial pos="0.0171394 0.000537591 4.8864e-07" quat="0.575338 0.411667 -0.574906 0.411094"
                                                      mass="0.085445" diaginertia="5.48211e-05 4.96646e-05 3.57798e-05"/>
                                            <joint name="left_wrist_roll_joint" class="wrist_roll"/>
                                            <geom class="visual" mesh="left_wrist_roll_link"/>
                                            <body name="left_wrist_pitch_link" pos="0.038 0 0">
                                                <inertial pos="0.0229999 -0.00111685 -0.00111658" quat="0.249998 0.661363 0.293036 0.643608"
                                                          mass="0.48405" diaginertia="0.000430353 0.000429873 0.000164648"/>
                                                <joint name="left_wrist_pitch_joint" class="wrist_pitch"/>
                                                <geom class="visual" mesh="left_wrist_pitch_link"/>
                                                <body name="left_wrist_yaw_link" pos="0.046 0 0">
                                                    <inertial pos="0.0708244 0.000191745 0.00161742" quat="0.510571 0.526295 0.468078 0.493188"
                                                              mass="0.254576" diaginertia="0.000646113 0.000559993 0.000147566"/>
                                                    <joint name="left_wrist_yaw_joint" class="wrist_yaw"/>
                                                    <geom class="visual" mesh="left_wrist_yaw_link"/>
                                                    <geom pos="0.0415 0.003 0" quat="1 0 0 0" class="visual" mesh="left_rubber_hand"/>
                                                    <site name="left_palm" pos="0.08 0 0" size="0.01"/>
                                                    <geom name="left_hand_collision" class="collision" type="capsule" size="0.05" fromto="0.05 0 0 0.1 0 0"/>
                                                </body>
                                            </body>
                                        </body>
                                    </body>
                                </body>
                            </body>
                        </body>
                        <body name="right_shoulder_pitch_link" pos="0.0039563 -0.10021 0.24778"
                              quat="0.990264 -0.139201 1.38722e-05 9.86868e-05">
                            <inertial pos="0 -0.035892 -0.011628" quat="0.68225 -0.326267 0.0130458 0.654152" mass="0.718"
                                      diaginertia="0.000465864 0.000432842 0.000406394"/>
                            <joint name="right_shoulder_pitch_joint" class="shoulder_pitch"/>
                            <geom class="visual" mesh="right_shoulder_pitch_link"/>
                            <geom size="0.03 0.025" pos="0 -0.04 -0.01" quat="0.707107 0 0.707107 0" type="cylinder" class="visual"/>
                            <body name="right_shoulder_roll_link" pos="0 -0.038 -0.013831" quat="0.990268 0.139172 0 0">
                                <inertial pos="-0.000227 -0.00727 -0.063243" quat="0.712604 -0.00710317 -0.0196223 0.701256"
                                          mass="0.643" diaginertia="0.000691311 0.000618011 0.000388977"/>
                                <joint name="right_shoulder_roll_joint" range="-2.2515 1.5882" class="shoulder_roll"/>
                                <geom class="visual" mesh="right_shoulder_roll_link"/>
                                <geom size="0.03 0.015" pos="-0.004 -0.006 -0.053" type="cylinder" class="visual"/>
                                <body name="right_shoulder_yaw_link" pos="0 -0.00624 -0.1032">
                                    <inertial pos="0.010773 0.002949 -0.072009" quat="0.687134 -0.0679942 -0.0964829 0.716879"
                                              mass="0.734" diaginertia="0.00106187 0.00103217 0.000400661"/>
                                    <joint name="right_shoulder_yaw_joint" class="shoulder_yaw"/>
                                    <geom class="visual" mesh="right_shoulder_yaw_link"/>
                                    <geom name="right_shoulder_yaw_collision" class="collision" type="capsule" size="0.035" fromto="0 0 -0.08 0 0 0.05"/>
                                    <body name="right_elbow_link" pos="0.015783 0 -0.080518">
                                        <inertial pos="0.064956 -0.004454 -0.010062" quat="0.388129 0.388821 0.636132 0.541765" mass="0.6"
                                                  diaginertia="0.000443035 0.000421612 0.000259353"/>
                                        <joint name="right_elbow_joint" class="elbow"/>
                                        <geom class="visual" mesh="right_elbow_link"/>
                                        <geom name="right_elbow_yaw_collision" class="collision" type="capsule" size="0.035" fromto="-0.01 0 -0.01 0.12 0 -0.01"/>
                                        <body name="right_wrist_roll_link" pos="0.1 -0.00188791 -0.01">
                                            <inertial pos="0.0171394 -0.000537591 4.8864e-07" quat="0.411667 0.575338 -0.411094 0.574906"
                                                      mass="0.085445" diaginertia="5.48211e-05 4.96646e-05 3.57798e-05"/>
                                            <joint name="right_wrist_roll_joint" class="wrist_roll"/>
                                            <geom class="visual" mesh="right_wrist_roll_link"/>
                                            <body name="right_wrist_pitch_link" pos="0.038 0 0">
                                                <inertial pos="0.0229999 0.00111685 -0.00111658" quat="0.643608 0.293036 0.661363 0.249998"
                                                          mass="0.48405" diaginertia="0.000430353 0.000429873 0.000164648"/>
                                                <joint name="right_wrist_pitch_joint" class="wrist_pitch"/>
                                                <geom class="visual" mesh="right_wrist_pitch_link"/>
                                                <body name="right_wrist_yaw_link" pos="0.046 0 0">
                                                    <inertial pos="0.0708244 -0.000191745 0.00161742" quat="0.493188 0.468078 0.526295 0.510571"
                                                              mass="0.254576" diaginertia="0.000646113 0.000559993 0.000147566"/>
                                                    <joint name="right_wrist_yaw_joint" class="wrist_yaw"/>
                                                    <geom class="visual" mesh="right_wrist_yaw_link"/>
                                                    <geom pos="0.0415 -0.003 0" quat="1 0 0 0" class="visual" mesh="right_rubber_hand"/>
                                                    <site name="right_palm" pos="0.08 0 0" size="0.01"/>
                                                    <geom name="right_hand_collision" class="collision" type="capsule" size="0.05" fromto="0.05 0 0 0.1 0 0"/>
                                                </body>
                                            </body>
                                        </body>
                                    </body>
                                </body>
                            </body>
                        </body>
                    </body>
                </body>
            </body>
        </body>
    </worldbody>

    <contact>>
        <!-- left foot - floor -->
        <pair name="left_foot1_floor" geom1="left_foot1_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="left_foot2_floor" geom1="left_foot2_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="left_foot3_floor" geom1="left_foot3_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="left_foot4_floor" geom1="left_foot4_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="left_foot5_floor" geom1="left_foot5_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="left_foot6_floor" geom1="left_foot6_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="left_foot7_floor" geom1="left_foot7_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <!-- right foot - floor -->
        <pair name="right_foot1_floor" geom1="right_foot1_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="right_foot2_floor" geom1="right_foot2_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="right_foot3_floor" geom1="right_foot3_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="right_foot4_floor" geom1="right_foot4_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="right_foot5_floor" geom1="right_foot5_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="right_foot6_floor" geom1="right_foot6_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
        <pair name="right_foot7_floor" geom1="right_foot7_collision" geom2="floor" solref="0.01 1" friction="0.8 0.8"/>
    </contact>

    <sensor>
        <framequat name="base_quat" objtype="site" objname="imu_in_pelvis"/>
        <gyro name="base_gyro" site="imu_in_pelvis"/>
        <accelerometer name="base_accel" site="imu_in_pelvis"/>

        <framequat name="mid360_quat" objtype="site" objname="mid360"/>
        <framepos name="mid360_pos" objtype="site" objname="mid360"/>
    </sensor>

    <visual>
        <headlight diffuse="0.6 0.6 0.6" ambient="0.3 0.3 0.3" specular="0 0 0"/>
        <rgba haze="0.15 0.25 0.35 1"/>
        <global azimuth="120" elevation="-20"/>
    </visual>

    <asset>
        <texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512" height="3072"/>
        <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.2 0.3 0.4" rgb2="0.1 0.2 0.3"
                 markrgb="0.8 0.8 0.8" width="300" height="300"/>
        <material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5" reflectance="0.2"/>
    </asset>

    <worldbody>
        <light pos="0 0 1.5" dir="0 0 -1" directional="true"/>
        <geom name="floor" size="0 0 0.01" type="plane" material="groundplane" conaffinity="1"/>
    </worldbody>

</mujoco>


================================================
FILE: kimodo/assets/skeletons/somaskel77/somaskel77_standard_tpose.bvh
================================================
HIERARCHY
ROOT Root
{
  OFFSET 0.0 0.0 0.0
  CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation
  JOINT Hips
  {
    OFFSET 0.0 100.0 0.0
    CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation
    JOINT Spine1
    {
      OFFSET -0.013727 5.003763 -0.053727
      CHANNELS 3 Zrotation Yrotation Xrotation
      JOINT Spine2
      {
        OFFSET -0.0 7.125301 -0.029825
        CHANNELS 3 Zrotation Yrotation Xrotation
        JOINT Chest
        {
          OFFSET -1e-06 7.550063 -0.815971
          CHANNELS 3 Zrotation Yrotation Xrotation
          JOINT Neck1
          {
            OFFSET -0.181677 26.311295 -0.553348
            CHANNELS 3 Zrotation Yrotation Xrotation
            JOINT Neck2
            {
              OFFSET -3e-06 7.709397 2.302585
              CHANNELS 3 Zrotation Yrotation Xrotation
              JOINT Head
              {
                OFFSET -5e-06 6.128916 1.953709
                CHANNELS 3 Zrotation Yrotation Xrotation
                JOINT HeadEnd
                {
                  OFFSET 0.003598 16.065403 -1.835379
                  CHANNELS 3 Zrotation Yrotation Xrotation
                }
                JOINT Jaw
                {
                  OFFSET 0.002637 0.475592 3.094941
                  CHANNELS 3 Zrotation Yrotation Xrotation
                }
                JOINT LeftEye
                {
                  OFFSET 3.206381 5.380205 7.586883
                  CHANNELS 3 Zrotation Yrotation Xrotation
                }
                JOINT RightEye
                {
                  OFFSET -3.22244 5.361869 7.558234
                  CHANNELS 3 Zrotation Yrotation Xrotation
                }
              }
            }
          }
          JOINT LeftShoulder
          {
            OFFSET 1.621652 23.237164 5.113413
            CHANNELS 3 Zrotation Yrotation Xrotation
            JOINT LeftArm
            {
              OFFSET 14.919846 2e-06 -5.502326
              CHANNELS 3 Zrotation Yrotation Xrotation
              JOINT LeftForeArm
              {
                OFFSET 28.739307 0.0 -0.002588
                CHANNELS 3 Zrotation Yrotation Xrotation
                JOINT LeftHand
                {
                  OFFSET 27.093981 -1e-06 0.002609
                  CHANNELS 3 Zrotation Yrotation Xrotation
                  JOINT LeftHandThumb1
                  {
                    OFFSET 2.276482 -1.392045 3.191413
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT LeftHandThumb2
                    {
                      OFFSET 4.012836 -1.828127 1.641654
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT LeftHandThumb3
                      {
                        OFFSET 2.798515 0.0 -3e-06
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT LeftHandThumbEnd
                        {
                          OFFSET 3.180793 -4e-06 4e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                        }
                      }
                    }
                  }
                  JOINT LeftHandIndex1
                  {
                    OFFSET 3.247555 -0.531998 2.296169
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT LeftHandIndex2
                    {
                      OFFSET 6.364578 0.01206 0.1786
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT LeftHandIndex3
                      {
                        OFFSET 3.662364 0.0 0.0
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT LeftHandIndex4
                        {
                          OFFSET 2.329242 4e-06 4e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT LeftHandIndexEnd
                          {
                            OFFSET 2.759615 -0.180537 -0.113024
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                  JOINT LeftHandMiddle1
                  {
                    OFFSET 3.163495 0.240981 1.000332
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT LeftHandMiddle2
                    {
                      OFFSET 6.19078 -0.259278 -1.002548
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT LeftHandMiddle3
                      {
                        OFFSET 4.35652 -4e-06 -1e-06
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT LeftHandMiddle4
                        {
                          OFFSET 2.996877 -8e-06 0.0
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT LeftHandMiddleEnd
                          {
                            OFFSET 2.304287 -0.294569 -0.031741
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                  JOINT LeftHandRing1
                  {
                    OFFSET 2.882643 -0.053652 -0.322543
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT LeftHandRing2
                    {
                      OFFSET 5.854541 -0.486202 -1.373841
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT LeftHandRing3
                      {
                        OFFSET 4.350578 0.0 3e-06
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT LeftHandRing4
                        {
                          OFFSET 2.651321 7e-06 2e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT LeftHandRingEnd
                          {
                            OFFSET 1.936105 0.077687 -7.1e-05
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                  JOINT LeftHandPinky1
                  {
                    OFFSET 2.8655 -0.310005 -1.600378
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT LeftHandPinky2
                    {
                      OFFSET 5.087849 -1.331141 -1.77123
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT LeftHandPinky3
                      {
                        OFFSET 3.070974 4e-06 0.0
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT LeftHandPinky4
                        {
                          OFFSET 1.549672 0.0 1e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT LeftHandPinkyEnd
                          {
                            OFFSET 1.944893 -0.157802 0.057219
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
          JOINT RightShoulder
          {
            OFFSET -1.380118 23.180309 5.214158
            CHANNELS 3 Zrotation Yrotation Xrotation
            JOINT RightArm
            {
              OFFSET -15.037196 1.2e-05 -5.545604
              CHANNELS 3 Zrotation Yrotation Xrotation
              JOINT RightForeArm
              {
                OFFSET -28.736639 2e-06 -0.002597
                CHANNELS 3 Zrotation Yrotation Xrotation
                JOINT RightHand
                {
                  OFFSET -27.133619 -0.0 0.002613
                  CHANNELS 3 Zrotation Yrotation Xrotation
                  JOINT RightHandThumb1
                  {
                    OFFSET -2.274032 -1.383988 3.163127
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT RightHandThumb2
                    {
                      OFFSET -4.011429 -1.827466 1.640914
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT RightHandThumb3
                      {
                        OFFSET -2.794935 -4e-06 -3e-06
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT RightHandThumbEnd
                        {
                          OFFSET -3.183852 4e-06 1e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                        }
                      }
                    }
                  }
                  JOINT RightHandIndex1
                  {
                    OFFSET -3.253266 -0.520057 2.282866
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT RightHandIndex2
                    {
                      OFFSET -6.341917 0.012471 0.178266
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT RightHandIndex3
                      {
                        OFFSET -3.654871 -8e-06 -0.0
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT RightHandIndex4
                        {
                          OFFSET -2.327586 0.0 1e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT RightHandIndexEnd
                          {
                            OFFSET -2.76179 -0.180656 -0.113078
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                  JOINT RightHandMiddle1
                  {
                    OFFSET -3.168106 0.246593 1.00103
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT RightHandMiddle2
                    {
                      OFFSET -6.180828 -0.258836 -1.000895
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT RightHandMiddle3
                      {
                        OFFSET -4.348901 0.0 -0.0
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT RightHandMiddle4
                        {
                          OFFSET -3.00024 -4e-06 -2e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT RightHandMiddleEnd
                          {
                            OFFSET -2.30252 -0.29437 -0.031706
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                  JOINT RightHandRing1
                  {
                    OFFSET -2.88569 -0.067952 -0.308858
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT RightHandRing2
                    {
                      OFFSET -5.854198 -0.48613 -1.373731
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT RightHandRing3
                      {
                        OFFSET -4.33881 -4e-06 -0.0
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT RightHandRing4
                        {
                          OFFSET -2.654903 -4e-06 4e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT RightHandRingEnd
                          {
                            OFFSET -1.933568 0.077527 -5.2e-05
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                  JOINT RightHandPinky1
                  {
                    OFFSET -2.866425 -0.342796 -1.584145
                    CHANNELS 3 Zrotation Yrotation Xrotation
                    JOINT RightHandPinky2
                    {
                      OFFSET -5.091371 -1.332055 -1.772385
                      CHANNELS 3 Zrotation Yrotation Xrotation
                      JOINT RightHandPinky3
                      {
                        OFFSET -3.062664 -4e-06 1e-06
                        CHANNELS 3 Zrotation Yrotation Xrotation
                        JOINT RightHandPinky4
                        {
                          OFFSET -1.546529 4e-06 -2e-06
                          CHANNELS 3 Zrotation Yrotation Xrotation
                          JOINT RightHandPinkyEnd
                          {
                            OFFSET -1.945119 -0.157718 0.057211
                            CHANNELS 3 Zrotation Yrotation Xrotation
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    JOINT LeftLeg
    {
      OFFSET 10.043214 -8.434526 2.595655
      CHANNELS 3 Zrotation Yrotation Xrotation
      JOINT LeftShin
      {
        OFFSET -1e-06 -43.221752 -0.802913
        CHANNELS 3 Zrotation Yrotation Xrotation
        JOINT LeftFoot
        {
          OFFSET 1e-06 -42.155094 -3.481523
          CHANNELS 3 Zrotation Yrotation Xrotation
          JOINT LeftToeBase
          {
            OFFSET 0.0 -5.059472 13.231529
            CHANNELS 3 Zrotation Yrotation Xrotation
            JOINT LeftToeEnd
            {
              OFFSET -0.009607 -1.647619 6.513017
              CHANNELS 3 Zrotation Yrotation Xrotation
            }
          }
        }
      }
    }
    JOINT RightLeg
    {
      OFFSET -10.047278 -8.29526 2.620317
      CHANNELS 3 Zrotation Yrotation Xrotation
      JOINT RightShin
      {
        OFFSET 1e-06 -43.362206 -0.805556
        CHANNELS 3 Zrotation Yrotation Xrotation
        JOINT RightFoot
        {
          OFFSET 2e-06 -42.117393 -3.478398
          CHANNELS 3 Zrotation Yrotation Xrotation
          JOINT RightToeBase
          {
            OFFSET -0.0 -5.079609 13.284196
            CHANNELS 3 Zrotation Yrotation Xrotation
            JOINT RightToeEnd
            {
              OFFSET 0.009532 -1.634378 6.460591
              CHANNELS 3 Zrotation Yrotation Xrotation
            }
          }
        }
      }
    }
  }
}
MOTION
Frames: 1
Frame Time: 0.03333333333333333
0.0 0.0 0.0 0.0 0.0 0.0 0.0 100.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0


================================================
FILE: kimodo/assets.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path

PACKAGE_ROOT = Path(__file__).resolve().parent
ASSETS_ROOT = PACKAGE_ROOT / "assets"
DEMO_ASSETS_ROOT = ASSETS_ROOT / "demo"
DEMO_EXAMPLES_ROOT = DEMO_ASSETS_ROOT / "examples"
SKELETONS_ROOT = ASSETS_ROOT / "skeletons"
SOMA_ASSETS_ROOT = ASSETS_ROOT / "SOMA"


def skeleton_asset_path(*parts: str) -> Path:
    return SKELETONS_ROOT.joinpath(*parts)


def demo_asset_path(*parts: str) -> Path:
    return DEMO_ASSETS_ROOT.joinpath(*parts)


================================================
FILE: kimodo/constraints.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Constraint sets for conditioning motion generation (root 2D, full body, end-effectors)."""

from typing import Optional, Union

import torch
from torch import Tensor

from kimodo.motion_rep.feature_utils import compute_heading_angle
from kimodo.skeleton import SkeletonBase, SOMASkeleton30, SOMASkeleton77
from kimodo.tools import ensure_batched, load_json, save_json

from .geometry import axis_angle_to_matrix, matrix_to_axis_angle


def _convert_constraint_local_rots_to_skeleton(local_rot_mats: Tensor, skeleton: SkeletonBase) -> Tensor:
    """Convert loaded local rotation matrices to match the skeleton's joint count.

    Handles SOMA 30↔77: constraint files may have been saved with 30 or 77 joints while the session
    skeleton (e.g. from the SOMA30 model) uses SOMASkeleton77.
    """
    n_joints = local_rot_mats.shape[-3]
    skeleton_joints = skeleton.nbjoints
    if n_joints == skeleton_joints:
        return local_rot_mats
    if n_joints == 77 and skeleton_joints == 30 and isinstance(skeleton, SOMASkeleton30):
        return skeleton.from_SOMASkeleton77(local_rot_mats)
    if n_joints == 30 and skeleton_joints == 77 and isinstance(skeleton, SOMASkeleton77):
        skel30 = SOMASkeleton30()
        return skel30.to_SOMASkeleton77(local_rot_mats)
    raise ValueError(
        f"Constraint joint count ({n_joints}) does not match skeleton joint count "
        f"({skeleton_joints}). Only SOMA 30↔77 conversion is supported."
    )


def create_pairs(tensor_A: Tensor, tensor_B: Tensor) -> Tensor:
    """Form all (a, b) pairs from two 1D tensors; output shape (len(A)*len(B), 2)."""
    pairs = torch.stack(
        (
            tensor_A[:, None].expand(-1, len(tensor_B)),
            tensor_B.expand(len(tensor_A), -1),
        ),
        dim=-1,
    ).reshape(-1, 2)
    return pairs


def compute_global_heading(global_joints_positions: Tensor, skeleton: SkeletonBase) -> Tensor:
    """Compute global root heading (cos, sin) from global joint positions using skeleton."""
    root_heading_angle = compute_heading_angle(global_joints_positions, skeleton)
    global_root_heading = torch.stack([torch.cos(root_heading_angle), torch.sin(root_heading_angle)], dim=-1)
    return global_root_heading


def _tensor_to(
    t: Tensor,
    device: Optional[Union[str, torch.device]] = None,
    dtype: Optional[torch.dtype] = None,
) -> Tensor:
    """Move tensor to device and/or dtype.

    Returns same tensor if no args.
    """
    if device is not None and dtype is not None:
        return t.to(device=device, dtype=dtype)
    if device is not None:
        return t.to(device=device)
    if dtype is not None:
        return t.to(dtype=dtype)
    return t


class Root2DConstraintSet:
    """Constraint set fixing root (x, z) trajectory and optionally global heading on given
    frames."""

    name = "root2d"

    def __init__(
        self,
        skeleton: SkeletonBase,
        frame_indices: Tensor,
        smooth_root_2d: Tensor,
        to_crop: bool = False,
        global_root_heading: Optional[Tensor] = None,
    ) -> None:
        self.skeleton = skeleton

        # if we pass the full smooth root 3D as input
        if smooth_root_2d.shape[-1] == 3:
            smooth_root_2d = smooth_root_2d[..., [0, 1]]

        if to_crop:
            smooth_root_2d = smooth_root_2d[frame_indices]
            if global_root_heading is not None:
                global_root_heading = global_root_heading[frame_indices]
        else:
            assert len(smooth_root_2d) == len(
                frame_indices
            ), "The number of smooth root 2d should be match the number of frames"
            if global_root_heading is not None:
                assert len(global_root_heading) == len(
                    frame_indices
                ), "The number of global root heading should be match the number of frames"

        self.smooth_root_2d = smooth_root_2d
        self.global_root_heading = global_root_heading
        self.frame_indices = frame_indices

    def update_constraints(self, data_dict: dict, index_dict: dict) -> None:
        """Append this constraint's smooth_root_2d (and optional global_root_heading) to data/index
        dicts."""
        data_dict["smooth_root_2d"].append(self.smooth_root_2d)
        index_dict["smooth_root_2d"].append(self.frame_indices)

        if self.global_root_heading is not None:
            # constraint the global heading
            data_dict["global_root_heading"].append(self.global_root_heading)
            index_dict["global_root_heading"].append(self.frame_indices)

    def crop_move(self, start: int, end: int) -> "Root2DConstraintSet":
        """Return a new constraint set for the cropped frame range [start, end)."""
        mask = (self.frame_indices >= start) & (self.frame_indices < end)

        if self.global_root_heading is not None:
            masked_global_root_heading = self.global_root_heading[mask]
        else:
            masked_global_root_heading = None

        return Root2DConstraintSet(
            self.skeleton,
            self.frame_indices[mask] - start,
            self.smooth_root_2d[mask],
            global_root_heading=masked_global_root_heading,
        )

    def get_save_info(self) -> dict:
        """Return a dict suitable for JSON serialization (frame_indices, smooth_root_2d, optional
        global_root_heading)."""
        out = {
            "type": self.name,
            "frame_indices": self.frame_indices,
            "smooth_root_2d": self.smooth_root_2d,
        }
        if self.global_root_heading is not None:
            out["global_root_heading"] = self.global_root_heading
        return out

    def to(
        self,
        device: Optional[Union[str, torch.device]] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> "Root2DConstraintSet":
        self.smooth_root_2d = _tensor_to(self.smooth_root_2d, device, dtype)
        self.frame_indices = _tensor_to(self.frame_indices, device, dtype)
        if self.global_root_heading is not None:
            self.global_root_heading = _tensor_to(self.global_root_heading, device, dtype)
        if device is not None and hasattr(self.skeleton, "to"):
            self.skeleton = self.skeleton.to(device)
        return self

    @classmethod
    def from_dict(cls, skeleton: SkeletonBase, dico: dict) -> "Root2DConstraintSet":
        """Build a Root2DConstraintSet from a dict (e.g. loaded from JSON)."""
        device = skeleton.device if hasattr(skeleton, "device") else "cpu"

        if "global_root_heading" in dico:
            global_root_heading = torch.tensor(dico["global_root_heading"], device=device)
        else:
            global_root_heading = None

        return cls(
            skeleton,
            frame_indices=torch.tensor(dico["frame_indices"]),
            smooth_root_2d=torch.tensor(dico["smooth_root_2d"], device=device),
            global_root_heading=global_root_heading,
        )


class FullBodyConstraintSet:
    """Constraint set fixing full-body global positions and rotations on given keyframes."""

    name = "fullbody"

    def __init__(
        self,
        skeleton: SkeletonBase,
        frame_indices: Tensor,
        global_joints_positions: Tensor,
        global_joints_rots: Tensor,
        smooth_root_2d: Optional[Tensor] = None,
        to_crop: bool = False,
    ):
        self.skeleton = skeleton
        self.frame_indices = frame_indices

        # if we pass the full smooth root 3D as input
        if smooth_root_2d is not None and smooth_root_2d.shape[-1] == 3:
            smooth_root_2d = smooth_root_2d[..., [0, 1]]

        if to_crop:
            global_joints_positions = global_joints_positions[frame_indices]
            global_joints_rots = global_joints_rots[frame_indices]
            if smooth_root_2d is not None:
                smooth_root_2d = smooth_root_2d[frame_indices]
        else:
            assert len(global_joints_positions) == len(
                frame_indices
            ), "The number of global positions should be match the number of frames"
            assert len(global_joints_rots) == len(
                frame_indices
            ), "The number of global joint rotations should be match the number of frames"

            if smooth_root_2d is not None:
                assert len(smooth_root_2d) == len(
                    frame_indices
                ), "The number of smooth root 2d (if specified) should be match the number of frames"

        if smooth_root_2d is None:
            # substitute the smooth root 2d with the real root
            smooth_root_2d = global_joints_positions[:, skeleton.root_idx, [0, 2]]

        # root y: from smooth or pelvis is the same
        self.root_y_pos = global_joints_positions[:, skeleton.root_idx, 1]

        self.global_joints_positions = global_joints_positions
        self.global_joints_rots = global_joints_rots
        self.global_root_heading = compute_global_heading(global_joints_positions, skeleton)
        self.smooth_root_2d = smooth_root_2d

    def update_constraints(self, data_dict: dict, index_dict: dict) -> None:
        """Append global positions, smooth root 2D, root y, and global heading to data/index
        dicts."""
        nbjoints = self.skeleton.nbjoints
        indices_lst = create_pairs(
            self.frame_indices,
            torch.arange(nbjoints, device=self.frame_indices.device),
        )
        data_dict["global_joints_positions"].append(
            self.global_joints_positions.reshape(-1, 3)
        )  # flatten the global positions
        index_dict["global_joints_positions"].append(indices_lst)

        # global rotations are not used here

        # as we use smooth root, also constraint the smooth root to get the same full body
        # maybe keep storing the hips offset, if we smooth it ourselves
        data_dict["smooth_root_2d"].append(self.smooth_root_2d)
        index_dict["smooth_root_2d"].append(self.frame_indices)

        # constraint the y pos of the root
        data_dict["root_y_pos"].append(self.root_y_pos)
        index_dict["root_y_pos"].append(self.frame_indices)

        # constraint the global heading
        data_dict["global_root_heading"].append(self.global_root_heading)
        index_dict["global_root_heading"].append(self.frame_indices)

    def crop_move(self, start: int, end: int) -> "FullBodyConstraintSet":
        """Return a new FullBodyConstraintSet for the cropped frame range [start, end)."""
        mask = (self.frame_indices >= start) & (self.frame_indices < end)
        return FullBodyConstraintSet(
            self.skeleton,
            self.frame_indices[mask] - start,
            self.global_joints_positions[mask],
            self.global_joints_rots[mask],
            self.smooth_root_2d[mask],
        )

    def get_save_info(self) -> dict:
        """Return a dict for JSON save: type, frame_indices, local_joints_rot, root_positions, smooth_root_2d."""
        local_joints_rot = self.skeleton.global_rots_to_local_rots(self.global_joints_rots)
        if isinstance(self.skeleton, SOMASkeleton30):
            local_joints_rot = self.skeleton.to_SOMASkeleton77(local_joints_rot)
        local_joints_rot = matrix_to_axis_angle(local_joints_rot)

        root_positions = self.global_joints_positions[:, self.skeleton.root_idx]
        return {
            "type": self.name,
            "frame_indices": self.frame_indices,
            "local_joints_rot": local_joints_rot,
            "root_positions": root_positions,
            "smooth_root_2d": self.smooth_root_2d,
        }

    def to(
        self,
        device: Optional[Union[str, torch.device]] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> "FullBodyConstraintSet":
        self.frame_indices = _tensor_to(self.frame_indices, device, dtype)
        self.global_joints_positions = _tensor_to(self.global_joints_positions, device, dtype)
        self.global_joints_rots = _tensor_to(self.global_joints_rots, device, dtype)
        self.root_y_pos = _tensor_to(self.root_y_pos, device, dtype)
        self.global_root_heading = _tensor_to(self.global_root_heading, device, dtype)
        self.smooth_root_2d = _tensor_to(self.smooth_root_2d, device, dtype)
        if device is not None and hasattr(self.skeleton, "to"):
            self.skeleton = self.skeleton.to(device)
        return self

    @classmethod
    def from_dict(cls, skeleton: SkeletonBase, dico: dict) -> "FullBodyConstraintSet":
        """Build a FullBodyConstraintSet from a dict (e.g. loaded from JSON)."""
        frame_indices = torch.tensor(dico["frame_indices"])
        device = skeleton.device if hasattr(skeleton, "device") else "cpu"
        local_rot = torch.tensor(dico["local_joints_rot"], device=device)
        local_rot_mats = axis_angle_to_matrix(local_rot)
        local_rot_mats = _convert_constraint_local_rots_to_skeleton(local_rot_mats, skeleton)
        global_joints_rots, global_joints_positions, _ = skeleton.fk(
            local_rot_mats,
            torch.tensor(dico["root_positions"], device=device),
        )
        smooth_root_2d = None
        if "smooth_root_2d" in dico:
            smooth_root_2d = torch.tensor(dico["smooth_root_2d"], device=device)

        return cls(
            skeleton,
            frame_indices=frame_indices,
            global_joints_positions=global_joints_positions,
            global_joints_rots=global_joints_rots,
            smooth_root_2d=smooth_root_2d,
        )


class EndEffectorConstraintSet:
    """Constraint set fixing selected end-effector positions and rotations on given frames."""

    name = "end-effector"

    def __init__(
        self,
        skeleton: SkeletonBase,
        frame_indices: Tensor,
        global_joints_positions: Tensor,
        global_joints_rots: Tensor,
        smooth_root_2d: Optional[Tensor],
        *,
        joint_names: list[str],
        to_crop: bool = False,
    ) -> None:
        self.skeleton = skeleton
        self.frame_indices = frame_indices
        self.joint_names = joint_names

        # joint_names are constant for all the frames
        rot_joint_names, pos_joint_names = self.skeleton.expand_joint_names(self.joint_names)
        # indexing works for motion_rep with smooth root only (contains pelvis index)
        self.pos_indices = torch.tensor([self.skeleton.bone_index[jname] for jname in pos_joint_names])
        self.rot_indices = torch.tensor([self.skeleton.bone_index[jname] for jname in rot_joint_names])

        # if we pass the full smooth root 3D as input
        if smooth_root_2d is not None and smooth_root_2d.shape[-1] == 3:
            smooth_root_2d = smooth_root_2d[..., [0, 1]]

        if to_crop:
            global_joints_positions = global_joints_positions[frame_indices]
            global_joints_rots = global_joints_rots[frame_indices]
            if smooth_root_2d is not None:
                smooth_root_2d = smooth_root_2d[frame_indices]
        else:
            assert len(global_joints_positions) == len(
                frame_indices
            ), "The number of global positions should be match the number of frames"
            assert len(global_joints_rots) == len(
                frame_indices
            ), "The number of global joint rotations should be match the number of frames"
            if smooth_root_2d is not None:
                assert len(smooth_root_2d) == len(
                    frame_indices
                ), "The number of smooth root 2d (if specified) should be match the number of frames"

        if smooth_root_2d is None:
            # substitute the smooth root 2d with the real root
            smooth_root_2d = global_joints_positions[:, skeleton.root_idx, [0, 2]]

        # root y: from smooth or pelvis is the same
        self.root_y_pos = global_joints_positions[:, skeleton.root_idx, 1]

        self.global_joints_positions = global_joints_positions
        self.global_root_heading = compute_global_heading(global_joints_positions, skeleton)
        self.global_joints_rots = global_joints_rots
        self.smooth_root_2d = smooth_root_2d

    def update_constraints(self, data_dict: dict, index_dict: dict) -> None:
        """Append constrained joint positions/rots, smooth root 2D, root y, and heading to
        data/index dicts."""
        crop_frames_indexing = torch.arange(len(self.frame_indices), device=self.frame_indices.device)

        # constraint positions
        pos_indices_real = create_pairs(
            self.frame_indices,
            self.pos_indices,
        )
        pos_indices_crop = create_pairs(
            crop_frames_indexing,
            self.pos_indices,
        )
        data_dict["global_joints_positions"].append(self.global_joints_positions[tuple(pos_indices_crop.T)])
        index_dict["global_joints_positions"].append(pos_indices_real)

        # constraint rotations
        rot_indices_real = create_pairs(
            self.frame_indices,
            self.rot_indices,
        )
        rot_indices_crop = create_pairs(
            crop_frames_indexing,
            self.rot_indices,
        )
        data_dict["global_joints_rots"].append(self.global_joints_rots[tuple(rot_indices_crop.T)])
        index_dict["global_joints_rots"].append(rot_indices_real)

        # as we use smooth root, also constraint the smooth root to get the same full body
        # maybe keep storing the hips offset, if we smooth it ourselves
        data_dict["smooth_root_2d"].append(self.smooth_root_2d)
        index_dict["smooth_root_2d"].append(self.frame_indices)

        # constraint the y pos of the root
        data_dict["root_y_pos"].append(self.root_y_pos)
        index_dict["root_y_pos"].append(self.frame_indices)

        # constraint the global heading
        data_dict["global_root_heading"].append(self.global_root_heading)
        index_dict["global_root_heading"].append(self.frame_indices)

    def crop_move(self, start: int, end: int) -> "EndEffectorConstraintSet":
        """Return a new EndEffectorConstraintSet for the cropped frame range [start, end)."""
        mask = (self.frame_indices >= start) & (self.frame_indices < end)

        cls = type(self)
        kwargs = {}
        if not hasattr(cls, "joint_names"):
            kwargs["joint_names"] = self.joint_names

        return cls(
            self.skeleton,
            self.frame_indices[mask] - start,
            self.global_joints_positions[mask],
            self.global_joints_rots[mask],
            self.smooth_root_2d[mask],
            **kwargs,
        )

    def get_save_info(self) -> dict:
        """Return a dict for JSON save: type, frame_indices, local_joints_rot, root_positions, smooth_root_2d, joint_names."""
        local_joints_rot = self.skeleton.global_rots_to_local_rots(self.global_joints_rots)
        if isinstance(self.skeleton, SOMASkeleton30):
            local_joints_rot = self.skeleton.to_SOMASkeleton77(local_joints_rot)
        local_joints_rot = matrix_to_axis_angle(local_joints_rot)

        root_positions = self.global_joints_positions[:, self.skeleton.root_idx]
        output = {
            "type": self.name,
            "frame_indices": self.frame_indices,
            "local_joints_rot": local_joints_rot,
            "root_positions": root_positions,
            "smooth_root_2d": self.smooth_root_2d,
        }
        if not hasattr(self.__class__, "joint_names"):
            # save the joint_names for this base class
            # but not for children
            output["joint_names"] = self.joint_names
        return output

    def to(
        self,
        device: Optional[Union[str, torch.device]] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> "EndEffectorConstraintSet":
        self.frame_indices = _tensor_to(self.frame_indices, device, dtype)
        self.pos_indices = _tensor_to(self.pos_indices, device, dtype)
        self.rot_indices = _tensor_to(self.rot_indices, device, dtype)
        self.root_y_pos = _tensor_to(self.root_y_pos, device, dtype)
        self.global_joints_positions = _tensor_to(self.global_joints_positions, device, dtype)
        self.global_root_heading = _tensor_to(self.global_root_heading, device, dtype)
        self.global_joints_rots = _tensor_to(self.global_joints_rots, device, dtype)
        self.smooth_root_2d = _tensor_to(self.smooth_root_2d, device, dtype)
        if device is not None and hasattr(self.skeleton, "to"):
            self.skeleton = self.skeleton.to(device)
        return self

    @classmethod
    def from_dict(cls, skeleton: SkeletonBase, dico: dict) -> "EndEffectorConstraintSet":
        """Build an EndEffectorConstraintSet from a dict (e.g. loaded from JSON)."""
        frame_indices = torch.tensor(dico["frame_indices"])
        device = skeleton.device if hasattr(skeleton, "device") else "cpu"
        local_rot = torch.tensor(dico["local_joints_rot"], device=device)
        local_rot_mats = axis_angle_to_matrix(local_rot)
        local_rot_mats = _convert_constraint_local_rots_to_skeleton(local_rot_mats, skeleton)
        global_joints_rots, global_joints_positions, _ = skeleton.fk(
            local_rot_mats,
            torch.tensor(dico["root_positions"], device=device),
        )
        smooth_root_2d = None
        if "smooth_root_2d" in dico:
            smooth_root_2d = torch.tensor(dico["smooth_root_2d"], device=device)

        kwargs = {}
        if not hasattr(cls, "joint_names"):
            kwargs["joint_names"] = dico["joint_names"]

        return cls(
            skeleton,
            frame_indices=frame_indices,
            global_joints_positions=global_joints_positions,
            global_joints_rots=global_joints_rots,
            smooth_root_2d=smooth_root_2d,
            **kwargs,
        )


class LeftHandConstraintSet(EndEffectorConstraintSet):
    """End-effector constraint for the left hand only."""

    name = "left-hand"
    joint_names: list[str] = ["LeftHand"]

    def __init__(self, *args, **kwargs: dict):
        super().__init__(*args, joint_names=self.joint_names, **kwargs)


class RightHandConstraintSet(EndEffectorConstraintSet):
    """End-effector constraint for the right hand only."""

    name = "right-hand"
    joint_names: list[str] = ["RightHand"]

    def __init__(self, *args, **kwargs: dict):
        super().__init__(*args, joint_names=self.joint_names, **kwargs)


class LeftFootConstraintSet(EndEffectorConstraintSet):
    """End-effector constraint for the left foot only."""

    name = "left-foot"
    joint_names: list[str] = ["LeftFoot"]

    def __init__(self, *args, **kwargs: dict):
        super().__init__(*args, joint_names=self.joint_names, **kwargs)


class RightFootConstraintSet(EndEffectorConstraintSet):
    """End-effector constraint for the right foot only."""

    name = "right-foot"
    joint_names: list[str] = ["RightFoot"]

    def __init__(self, *args, **kwargs: dict):
        super().__init__(*args, joint_names=self.joint_names, **kwargs)


TYPE_TO_CLASS = {
    "root2d": Root2DConstraintSet,
    "fullbody": FullBodyConstraintSet,
    "left-hand": LeftHandConstraintSet,
    "right-hand": RightHandConstraintSet,
    "left-foot": LeftFootConstraintSet,
    "right-foot": RightFootConstraintSet,
    "end-effector": EndEffectorConstraintSet,
}


def load_constraints_lst(
    path_or_data: str | list,
    skeleton: SkeletonBase,
    device: Optional[Union[str, torch.device]] = None,
    dtype: Optional[torch.dtype] = None,
):
    """Load a list of constraints from JSON path or list of dicts.

    Args:
        path_or_data: Path to constraints.json or list of constraint dicts.
        skeleton: Skeleton instance (used for from_dict).
        device: If set, move all constraint tensors and skeleton to this device.
        dtype: If set, cast constraint tensors to this dtype.
    """
    if isinstance(path_or_data, str):
        saved = load_json(path_or_data)
    else:
        saved = path_or_data

    constraints_lst = []
    for el in saved:
        cls = TYPE_TO_CLASS[el["type"]]
        c = cls.from_dict(skeleton, el)
        if device is not None or dtype is not None:
            c.to(device=device, dtype=dtype)
        constraints_lst.append(c)
    return constraints_lst


def save_constraints_lst(path: str, constraints_lst: list) -> list | None:
    """Save a list of constraint sets to a JSON file.

    Returns None if list is empty.
    """
    if not constraints_lst:
        print("The constraints lst is empty. Skip saving")
        return

    to_save = []

    def tensor_to_list(obj):
        """Recursively convert tensors to lists for JSON serialization."""
        if isinstance(obj, Tensor):
            return obj.cpu().tolist()
        elif isinstance(obj, dict):
            return {k: tensor_to_list(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [tensor_to_list(v) for v in obj]
        else:
            return obj

    for constraint in constraints_lst:
        constraint_info = constraint.get_save_info()
        # Convert all tensors to lists for JSON serialization
        constraint_info = tensor_to_list(constraint_info)
        to_save.append(constraint_info)

    save_json(path, to_save)
    print(f"Saved constraints to {path}")
    return to_save


================================================
FILE: kimodo/demo/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# ruff: noqa: I001
import argparse

from kimodo.model import DEFAULT_MODEL
from kimodo.model.registry import resolve_model_name

from .app import Demo


def main() -> None:
    parser = argparse.ArgumentParser(description="Run the kimodo demo UI.")
    parser.add_argument(
        "--model",
        type=str,
        default=DEFAULT_MODEL,
        help="Default model to load (e.g. Kimodo-SOMA-RP-v1, kimodo-soma-rp, or SOMA).",
    )
    args = parser.parse_args()

    resolved = resolve_model_name(args.model, "Kimodo")
    demo = Demo(default_model_name=resolved)
    demo.run()


if __name__ == "__main__":
    main()


================================================
FILE: kimodo/demo/__main__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Entry point for `python -m kimodo.demo`."""

from kimodo.demo import main

if __name__ == "__main__":
    main()


================================================
FILE: kimodo/demo/app.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import base64
import os
import shutil
import threading
import time
from typing import Optional

import numpy as np
import torch

import viser
from kimodo.assets import DEMO_ASSETS_ROOT
from kimodo.model.load_model import load_model
from kimodo.model.registry import resolve_model_name
from kimodo.skeleton import SkeletonBase, SOMASkeleton30
from kimodo.tools import load_json
from kimodo.viz import viser_utils
from kimodo.viz.viser_utils import (
    Character,
    CharacterMotion,
    EEJointsKeyframeSet,
    FullbodyKeyframeSet,
    RootKeyframe2DSet,
)
from viser.theme import TitlebarButton, TitlebarConfig, TitlebarImage

from . import generation, ui
from .config import (
    DARK_THEME,
    DEFAULT_CUR_DURATION,
    DEFAULT_MODEL,
    DEFAULT_PLAYBACK_SPEED,
    DEFAULT_PROMPT,
    DEMO_UI_QUICK_START_MODAL_MD,
    EXAMPLES_ROOT_DIR,
    HF_MODE,
    LIGHT_THEME,
    MAX_ACTIVE_USERS,
    MAX_DURATION,
    MAX_SESSION_MINUTES,
    MIN_DURATION,
    MODEL_EXAMPLES_DIRS,
    MODEL_NAMES,
    SERVER_NAME,
    SERVER_PORT,
)
from .embedding_cache import CachedTextEncoder
from .queue_manager import QueueManager, UserQueue
from .state import ClientSession, ModelBundle


class Demo:
    def __init__(self, default_model_name: str = DEFAULT_MODEL):
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        self.models: dict[str, ModelBundle] = {}
        self._text_encoder = None
        resolved = resolve_model_name(default_model_name, "Kimodo")
        if resolved not in MODEL_NAMES:
            raise ValueError(f"Unknown model '{default_model_name}'. Expected one of: {MODEL_NAMES}")
        self.default_model_name = resolved
        self.ensure_examples_layout()
        self.load_model(self.default_model_name)

        # Serialize GPU-bound generation across all clients
        self._generation_lock = threading.Lock()
        self._cuda_healthy = True

        # Per-client sessions
        self.client_sessions: dict[int, ClientSession] = {}
        self.start_direction_markers: dict[int, viser_utils.WaypointMesh] = {}
        self.grid_handles: dict[int, viser.GridHandle] = {}

        self.server = viser.ViserServer(
            host=SERVER_NAME,
            port=SERVER_PORT,
            label="Kimodo",
            enable_camera_keyboard_controls=False,  # don't move the camera with the arrow keys
        )
        self.server.scene.world_axes.visible = False  # used for debugging
        self.server.scene.set_up_direction("+y")

        # Register callbacks for session handling
        self.server.on_client_connect(self.on_client_connect)
        self.server.on_client_disconnect(self.on_client_disconnect)

        # HF mode: queue and session limit
        if HF_MODE:
            self.user_queue = UserQueue(MAX_ACTIVE_USERS, MAX_SESSION_MINUTES)
            self.queue_manager = QueueManager(
                queue=self.user_queue,
                server=self.server,
                setup_demo_for_client=self._setup_demo_for_client,
                cleanup_session=self._cleanup_session_for_client,
            )
        else:
            self.user_queue = None
            self.queue_manager = None

        # create grid and floor
        self.floor_len = 20.0  # meters

    def ensure_examples_layout(self) -> None:
        os.makedirs(EXAMPLES_ROOT_DIR, exist_ok=True)
        for model_dir in MODEL_EXAMPLES_DIRS.values():
            os.makedirs(model_dir, exist_ok=True)

        for entry in os.listdir(EXAMPLES_ROOT_DIR):
            if entry in MODEL_EXAMPLES_DIRS:
                continue
            src = os.path.join(EXAMPLES_ROOT_DIR, entry)
            if not os.path.isdir(src):
                continue
            dst = os.path.join(
                MODEL_EXAMPLES_DIRS.get(DEFAULT_MODEL, next(iter(MODEL_EXAMPLES_DIRS.values()))),
                entry,
            )
            if not os.path.exists(dst):
                shutil.move(src, dst)

    def get_examples_base_dir(self, model_name: str, absolute: bool = True) -> str:
        return MODEL_EXAMPLES_DIRS[model_name]

    def load_model(self, model_name: str) -> ModelBundle:
        if model_name in self.models:
            return self.models[model_name]

        print(f"Loading model {model_name}...")
        try:
            model = load_model(
                modelname=model_name,
                device=self.device,
                text_encoder=self._text_encoder,
            )
        except Exception as e:
            print(f"Error loading model: {e}\nMake sure text encoder server is running!")
            raise e

        if hasattr(model, "text_encoder"):
            if self._text_encoder is None:
                self._text_encoder = model.text_encoder
            model.text_encoder = CachedTextEncoder(model.text_encoder, model_name=model_name)

        skeleton = model.motion_rep.skeleton
        if isinstance(skeleton, SOMASkeleton30):
            skeleton = skeleton.somaskel77.to(model.device)
        bundle = ModelBundle(
            model=model,
            motion_rep=model.motion_rep,
            skeleton=skeleton,
            model_fps=model.motion_rep.fps,
        )
        self.models[model_name] = bundle
        print(f"Model {model_name} loaded successfully")
        self.prewarm_embedding_cache(model_name, bundle.model)
        return bundle

    def prewarm_embedding_cache(self, model_name: str, model: object) -> None:
        encoder = getattr(model, "text_encoder", None)
        if not isinstance(encoder, CachedTextEncoder):
            return

        prompt_set = set()
        prompt_set.add(DEFAULT_PROMPT)

        examples_dir = MODEL_EXAMPLES_DIRS.get(model_name)
        if examples_dir and os.path.isdir(examples_dir):
            for entry in os.listdir(examples_dir):
                example_dir = os.path.join(examples_dir, entry)
                if not os.path.isdir(example_dir):
                    continue
                meta_path = os.path.join(example_dir, "meta.json")
                if not os.path.exists(meta_path):
                    continue
                try:
                    meta = load_json(meta_path)
                except Exception:
                    continue
                for prompt in meta.get("prompts_text", []):
                    if isinstance(prompt, str):
                        prompt_set.add(prompt)

        if prompt_set:
            encoder.prewarm(list(prompt_set))

    def build_constraint_tracks(
        self, client: viser.ClientHandle, skeleton: SkeletonBase
    ) -> dict[str, viser_utils.ConstraintSet]:
        return {
            "Full-Body": FullbodyKeyframeSet(
                name="Full-Body",
                server=client,
                skeleton=skeleton,
            ),
            "End-Effectors": EEJointsKeyframeSet(
                name="End-Effectors",
                server=client,
                skeleton=skeleton,
            ),
            "2D Root": RootKeyframe2DSet(
                name="2D Root",
                server=client,
                skeleton=skeleton,
            ),
        }

    def set_timeline_defaults(self, timeline, model_fps: float) -> None:
        timeline.set_defaults(
            default_text=DEFAULT_PROMPT,
            default_duration=int(DEFAULT_CUR_DURATION * model_fps - 1),
            min_duration=int(MIN_DURATION * model_fps - 1),  # 2 seconds minimum,
            max_duration=int(
                MAX_DURATION * model_fps - 1  # - NB_TRANSITION_FRAMES
            ),  # 10 seconds maximum, minus the transition frames, if needed
            default_num_frames_zoom=int(1.10 * 10 * model_fps),  # a bit more than the max
            max_frames_zoom=1000,
            fps=model_fps,
        )

    def _apply_constraint_overlay_visibility(self, session: ClientSession) -> None:
        """Apply show-all vs show-only-current-frame to constraint overlays."""
        only_frame = session.frame_idx if session.show_only_current_constraint else None
        for constraint in session.constraints.values():
            constraint.set_overlay_visibility(only_frame)

    def set_constraint_tracks_visible(self, session: ClientSession, visible: bool) -> None:
        timeline = session.client.timeline
        timeline_data = session.timeline_data
        if timeline_data.get("constraint_tracks_visible", True) == visible:
            return

        with timeline_data["keyframe_update_lock"]:
            if visible:
                for track_id, track_info in timeline_data["tracks"].items():
                    timeline.add_track(
                        track_info["name"],
                        track_type=track_info.get("track_type", "keyframe"),
                        color=track_info.get("color"),
                        height_scale=track_info.get("height_scale", 1.0),
                        uuid=track_id,
                    )

                for keyframe_id, keyframe_data in timeline_data["keyframes"].items():
                    timeline.add_keyframe(
                        track_id=keyframe_data["track_id"],
                        frame=keyframe_data["frame"],
                        value=keyframe_data.get("value"),
                        opacity=keyframe_data.get("opacity", 1.0),
                        locked=keyframe_data.get("locked", False),
                        uuid=keyframe_id,
                    )

                for interval_id, interval_data in timeline_data["intervals"].items():
                    timeline.add_interval(
                        track_id=interval_data["track_id"],
                        start_frame=interval_data["start_frame_idx"],
                        end_frame=interval_data["end_frame_idx"],
                        value=interval_data.get("value"),
                        opacity=interval_data.get("opacity", 1.0),
                        locked=interval_data.get("locked", False),
                        uuid=interval_id,
                    )
            else:
                for track_id in list(timeline_data["tracks"].keys()):
                    timeline.remove_track(track_id)

        timeline_data["constraint_tracks_visible"] = visible

    def _cleanup_session_for_client(self, client_id: int) -> None:
        """Remove session and scene state for a client (e.g. on session expiry)."""
        if client_id in self.client_sessions:
            del self.client_sessions[client_id]
        self.start_direction_markers.pop(client_id, None)
        self.grid_handles.pop(client_id, None)

    def _setup_demo_for_client(self, client: viser.ClientHandle) -> None:
        """Initialize scene, GUI, and session state for a client (no modals)."""
        self.setup_scene(client)

        model_bundle = self.load_model(self.default_model_name)

        # Initialize each empty constraint track
        constraint_tracks = self.build_constraint_tracks(client, model_bundle.skeleton)

        # Create GUI elements for this client
        (
            gui_elements,
            timeline_tracks,
            example_dict,
            gui_examples_dropdown,
            gui_save_example_path_text,
            gui_model_selector,
        ) = ui.create_gui(
            demo=self,
            client=client,
            model_name=self.default_model_name,
            model_fps=model_bundle.model_fps,
        )
        timeline_data = {
            "tracks": timeline_tracks,
            "tracks_ids": {val["name"]: key for key, val in timeline_tracks.items()},
            "keyframes": {},
            "intervals": {},
            "keyframe_update_lock": threading.Lock(),
            "keyframe_move_timers": {},
            "pending_keyframe_moves": {},  # keyframe_id -> new_frame
            "constraint_tracks_visible": True,
            "dense_path_after_release_timer": None,
        }

        # Initialize session state
        cur_duration = DEFAULT_CUR_DURATION
        max_frame_idx = int(cur_duration * model_bundle.model_fps - 1)

        session = ClientSession(
            client=client,
            gui_elements=gui_elements,
            motions={},
            constraints=constraint_tracks,
            timeline_data=timeline_data,
            frame_idx=0,
            playing=False,
            playback_speed=DEFAULT_PLAYBACK_SPEED,
            cur_duration=cur_duration,
            max_frame_idx=max_frame_idx,
            updating_motions=False,
            edit_mode=False,
            model_name=self.default_model_name,
            model_fps=model_bundle.model_fps,
            skeleton=model_bundle.skeleton,
            motion_rep=model_bundle.motion_rep,
            examples_base_dir=self.get_examples_base_dir(self.default_model_name, absolute=True),
            example_dict=example_dict,
            gui_examples_dropdown=gui_examples_dropdown,
            gui_save_example_path_text=gui_save_example_path_text,
            gui_model_selector=gui_model_selector,
        )

        self.client_sessions[client.client_id] = session

        # Initialize default character for this client
        self.add_character_motion(client, session.skeleton)

    def on_client_connect(self, client: viser.ClientHandle) -> None:
        """Initialize GUI and state for each new client."""
        print(f"Client {client.client_id} connected")

        if HF_MODE and self.queue_manager is not None:
            self.queue_manager.on_client_connect(client)
        else:
            # Show quick start popup when a browser client connects (non-HF mode).
            with client.gui.add_modal(
                "Welcome — Quick Start",
                size="xl",
                show_close_button=True,
                save_choice="kimodo.demo.quick_start_ack",
            ) as modal:
                client.gui.add_markdown(DEMO_UI_QUICK_START_MODAL_MD)
                client.gui.add_button("Got it (don't remind me again)").on_click(lambda _event: modal.close())
            self._setup_demo_for_client(client)

    def setup_scene(self, client: viser.ClientHandle) -> None:
        self.configure_theme(client)
        client.camera.position = np.array(
            [2.7417358737841426, 1.8790455698853281, 7.675741569777456],
            dtype=np.float64,
        )
        client.camera.look_at = np.array([0.0, 0.0, 0.0], dtype=np.float64)
        client.camera.up_direction = np.array(
            [-1.1102230246251568e-16, 1.0, 1.3596310734468913e-32],
            dtype=np.float64,
        )
        client.camera.fov = np.deg2rad(45.0)
        grid_handle = client.scene.add_grid(
            "/grid",
            width=self.floor_len,
            height=self.floor_len,
            wxyz=viser.transforms.SO3.from_x_radians(-np.pi / 2.0).wxyz,
            position=(0.0, 0.0001, 0.0),
            fade_distance=3 * self.floor_len,
            section_color=LIGHT_THEME["grid"],
            infinite_grid=True,
        )
        self.grid_handles[client.client_id] = grid_handle
        # marker for origin
        origin_waypoint = viser_utils.WaypointMesh(
            "/origin_waypoint",
            client,
            position=np.array([0.0, 0.0, 0.0]),
            heading=np.array([0.0, 1.0]),
            color=(0, 0, 255),
        )
        self.start_direction_markers[client.client_id] = origin_waypoint

    def on_client_disconnect(self, client: viser.ClientHandle) -> None:
        """Clean up when client disconnects."""
        print(f"Client {client.client_id} disconnected")
        client_id = client.client_id

        if HF_MODE and self.queue_manager is not None:
            self.queue_manager.on_client_disconnect(client_id)

        self._cleanup_session_for_client(client_id)

    def set_start_direction_visible(self, client_id: int, visible: bool) -> None:
        marker = self.start_direction_markers.get(client_id)
        if marker is None:
            return
        marker.set_visible(visible)

    def client_active(self, client_id: int) -> bool:
        return client_id in self.client_sessions

    def add_character_motion(
        self,
        client: viser.ClientHandle,
        skeleton: SkeletonBase,
        joints_pos: Optional[torch.Tensor] = None,
        joints_rot: Optional[torch.Tensor] = None,
        foot_contacts: Optional[torch.Tensor] = None,
    ) -> None:
        client_id = client.client_id
        if not self.client_active(client_id):
            return
        session = self.client_sessions[client_id]

        ci = len(session.motions)
        character_name = f"character{ci}"
        # build character skeleton and skinning mesh
        if "g1" in session.model_name:
            mesh_mode = "g1_stl"
        elif "smplx" in session.model_name:
            mesh_mode = "smplx_skin"
        elif "soma" in session.model_name:
            if session.gui_elements.gui_use_soma_layer_checkbox.value:
                mesh_mode = "soma_layer_skin"
            else:
                mesh_mode = "soma_skin"
        else:
            raise ValueError("The model name is not recognized for skinning.")

        new_character = Character(
            character_name,
            client,
            skeleton,
            create_skeleton_mesh=True,
            create_skinned_mesh=True,
            visible_skeleton=False,  # don't show immediately
            visible_skinned_mesh=False,  # don't show immediately
            skinned_mesh_opacity=session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value,
            show_foot_contacts=session.gui_elements.gui_viz_foot_contacts_checkbox.value,
            dark_mode=session.gui_elements.gui_dark_mode_checkbox.value,
            mesh_mode=mesh_mode,
            gui_use_soma_layer_checkbox=session.gui_elements.gui_use_soma_layer_checkbox,
        )

        # if no motion given, initialize to character default (rest) pose for one frame
        init_joints_pos, init_joints_rot = new_character.get_pose()
        if joints_pos is None:
            joints_pos = init_joints_pos[None].repeat(session.max_frame_idx + 1, 1, 1)
        if joints_rot is None:
            joints_rot = init_joints_rot[None].repeat(session.max_frame_idx + 1, 1, 1, 1)

        new_motion = CharacterMotion(new_character, joints_pos, joints_rot, foot_contacts)
        # save the motion in our dict
        session.motions[character_name] = new_motion

        # put the character at the right frame
        new_motion.set_frame(session.frame_idx)

        # put them visible with a small delay
        # so that the set_frame function has time to finish
        def _set_visibility():
            new_motion.character.set_skinned_mesh_visibility(session.gui_elements.gui_viz_skinned_mesh_checkbox.value)
            new_motion.character.set_skeleton_visibility(session.gui_elements.gui_viz_skeleton_checkbox.value)

        timer = threading.Timer(
            0.2,  # 0.2s delay
            _set_visibility,
        )
        timer.start()

    def clear_motions(self, client_id: int) -> None:
        if not self.client_active(client_id):
            return
        session = self.client_sessions[client_id]
        for motion in list(session.motions.values()):
            motion.clear()
        session.motions.clear()

    def compute_model_constraints_lst(
        self,
        session: ClientSession,
        model_bundle: ModelBundle,
        num_frames: int,
    ):
        return generation.compute_model_constraints_lst(session, model_bundle, num_frames, self.device)

    def check_cuda_health(self) -> bool:
        """Check if CUDA is still functional.

        Trigger auto-restart if corrupted.
        """
        if self.device == "cpu":
            return True
        try:
            torch.tensor([1.0], device=self.device) + torch.tensor([1.0], device=self.device)
            return True
        except RuntimeError as e:
            if "device-side assert" in str(e) or "CUDA error" in str(e):
                if self._cuda_healthy:
                    self._cuda_healthy = False
                    print("FATAL: CUDA context is corrupted (device-side assert). " "The process must be restarted.")
                    self._trigger_restart()
                return False
            raise

    def _trigger_restart(self) -> None:
        """Exit the process so the HF Space (or systemd/Docker) can restart it."""
        import sys

        print("Initiating automatic restart due to unrecoverable CUDA error...")
        sys.stdout.flush()
        sys.stderr.flush()
        os._exit(1)

    def generate(
        self,
        client: viser.ClientHandle,
        prompts: list[str],
        num_frames: list[int],
        num_samples: int,
        seed: int,
        diffusion_steps: int,
        cfg_weight: Optional[list[float]] = None,
        cfg_type: Optional[str] = None,
        postprocess_parameters: Optional[dict] = None,
        transitions_parameters: Optional[dict] = None,
        real_robot_rotations: bool = False,
    ) -> None:
        if not self._cuda_healthy:
            raise RuntimeError("CUDA is in a corrupted state. The space is restarting...")

        locked = self._generation_lock.acquire(blocking=False)
        if not locked:
            waiting_notif = client.add_notification(
                title="Waiting for GPU...",
                body="Another generation is in progress. Yours will start automatically.",
                loading=True,
                with_close_button=False,
            )
            self._generation_lock.acquire()
            waiting_notif.remove()

        try:
            session = self.client_sessions[client.client_id]
            model_bundle = self.load_model(session.model_name)
            generation.generate(
                client=client,
                session=session,
                model_bundle=model_bundle,
                prompts=prompts,
                num_frames=num_frames,
                num_samples=num_samples,
                seed=seed,
                diffusion_steps=diffusion_steps,
                cfg_weight=cfg_weight,
                cfg_type=cfg_type,
                postprocess_parameters=postprocess_parameters,
                transitions_parameters=transitions_parameters,
                real_robot_rotations=real_robot_rotations,
                device=self.device,
                clear_motions=self.clear_motions,
                add_character_motion=self.add_character_motion,
            )
        finally:
            self._generation_lock.release()

    def set_frame(self, client_id: int, frame_idx: int, update_timeline: bool = True):
        if not self.client_active(client_id):
            return

        session = self.client_sessions[client_id]

        session.frame_idx = frame_idx
        if update_timeline:
            session.client.timeline.set_current_frame(frame_idx)
        for motion in list(session.motions.values()):
            motion.set_frame(frame_idx)
        self._apply_constraint_overlay_visibility(session)

    def run(self) -> None:
        update_counter = 0
        cuda_check_interval = 300
        while True:
            last_update_time = time.time()
            if self.models:
                # the max playback speed is 2x the model fps (from gui_playback_speed_buttons)
                playback_fps = max(bundle.model_fps for bundle in self.models.values()) * 2.0
            else:
                playback_fps = 60.0

            # update each client session independently
            #   copy to a list first to avoid changing size if client disconnects
            for client_id, session in list(self.client_sessions.items()):
                update_interval = int(playback_fps / (session.playback_speed * session.model_fps))
                new_frame_idx = session.frame_idx
                if session.playing and update_counter % update_interval == 0:
                    if session.frame_idx >= session.max_frame_idx:
                        new_frame_idx = 0
                    else:
                        new_frame_idx = session.frame_idx + 1

                    # make sure the client is still active before updating the frame
                    if self.client_active(client_id):
                        self.set_frame(client_id, new_frame_idx)

            if update_counter % cuda_check_interval == 0:
                self.check_cuda_health()

            time_remaining = max(0, 1.0 / playback_fps - (time.time() - last_update_time))
            time.sleep(time_remaining)
            update_counter += 1
            update_counter %= playback_fps  # wrap around to 0 every second

    def configure_theme(
        self,
        client: viser.ClientHandle,
        dark_mode: bool = False,
        titlebar_dark_mode_checkbox_uuid: str | None = None,
    ):
        # Sync grid color with theme (light vs dark)
        theme = DARK_THEME if dark_mode else LIGHT_THEME
        grid_handle = self.grid_handles.get(client.client_id)
        if grid_handle is not None:
            grid_handle.section_color = theme["grid"]

        #
        # setup theme
        #
        buttons = (
            TitlebarButton(
                text="Documentation",
                icon="Description",
                href="https://research.nvidia.com/labs/sil/projects/kimodo/docs/interactive_demo/index.html",
            ),
            TitlebarButton(
                text="Project Page",
                icon=None,
                href="https://research.nvidia.com/labs/sil/projects/kimodo/",
            ),
            TitlebarButton(
                text="Github",
                icon="GitHub",
                href="https://github.com/nv-tlabs/kimodo",
            ),
        )
        assets_dir = DEMO_ASSETS_ROOT
        logo_light_path = assets_dir / "nvidia_logo.png"
        logo_dark_path = assets_dir / "nvidia_logo_dark.png"
        if logo_light_path.exists():
            light_b64 = base64.standard_b64encode(logo_light_path.read_bytes()).decode("ascii")
            dark_b64 = (
                base64.standard_b64encode(logo_dark_path.read_bytes()).decode("ascii")
                if logo_dark_path.exists()
                else None
            )
            image = TitlebarImage(
                image_url_light=f"data:image/png;base64,{light_b64}",
                image_url_dark=(f"data:image/png;base64,{dark_b64}" if dark_b64 else None),
                image_alt="NVIDIA",
                href="https://www.nvidia.com/",
            )
        else:
            image = None
        titlebar_theme = TitlebarConfig(buttons=buttons, image=image, title_text="Kimodo")
        client.gui.set_panel_label("Kimodo")
        client.gui.configure_theme(
            titlebar_content=titlebar_theme,
            control_layout="floating",  # "floating",  # ['floating', 'collapsible', 'fixed']
            control_width="large",  # ['small', 'medium', 'large']
            dark_mode=dark_mode,
            show_logo=False,  # hide viser logo on bottom left corner
            show_share_button=False,
            titlebar_dark_mode_checkbox_uuid=titlebar_dark_mode_checkbox_uuid,
            brand_color=(152, 189, 255),  # (60, 131, 0),  # (R, G, B) tuple
        )


================================================
FILE: kimodo/demo/config.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os

from kimodo.assets import DEMO_EXAMPLES_ROOT
from kimodo.model.registry import (
    AVAILABLE_MODELS,
    DEFAULT_MODEL,
    FRIENDLY_NAMES,
    get_datasets,
    get_model_info,
    get_models_for_dataset_skeleton,
    get_short_key_from_display_name,
    get_skeleton_display_name,
    get_skeleton_display_names_for_dataset,
    get_skeleton_key_from_display_name,
    get_skeletons_for_dataset,
    get_versions_for_dataset_skeleton,
    resolve_to_short_key,
)

SERVER_NAME = os.environ.get("SERVER_NAME", "0.0.0.0")
SERVER_PORT = int(os.environ.get("SERVER_PORT", "7860"))
HF_MODE = os.environ.get("HF_MODE", False)

# HF mode: user queue and session limit (override via env in Spaces)
MAX_ACTIVE_USERS = int(os.environ.get("MAX_ACTIVE_USERS", "5"))
MAX_SESSION_MINUTES = float(os.environ.get("MAX_SESSION_MINUTES", "5.0"))

DEFAULT_PLAYBACK_SPEED = 1.0
# default start duration is 6.0 sec, but model can handle up to 10 sec
DEFAULT_CUR_DURATION = 6.0
DEFAULT_PROMPT = "A person walks forward."
MIN_DURATION = 2.0
MAX_DURATION = 10.0

SHOW_TRANSITION_PARAMS = True
INIT_POSTPROCESSING = True
NB_TRANSITION_FRAMES = 5

LIGHT_THEME = dict(
    floor=(220, 220, 220),
    grid=(180, 180, 180),
)

# Dark theme: slightly lighter grid and floor for better visibility and less flat black
DARK_THEME = dict(
    floor=(48, 48, 52),
    grid=(105, 105, 110),
)

EXAMPLES_ROOT_DIR = str(DEMO_EXAMPLES_ROOT)

# Model list and paths from kimodo registry (all models: Kimodo + TMR)
MODEL_NAMES = tuple(AVAILABLE_MODELS)
MODEL_EXAMPLES_DIRS = {name: os.path.join(EXAMPLES_ROOT_DIR, name) for name in MODEL_NAMES}
# Display labels for backward compatibility (short_key -> display name)
MODEL_LABELS = {name: FRIENDLY_NAMES.get(name, f"Model ({name})") for name in MODEL_NAMES}
MODEL_LABEL_TO_NAME = {label: name for name, label in MODEL_LABELS.items()}

# -----------------------------------------------------------------------------
# Demo UI copy
# -----------------------------------------------------------------------------

DEMO_UI_QUICK_START_CORE_MD = """
### Camera
- **Left-drag**: rotate
- **Right-drag**: pan
- **Scroll**: zoom

### Playback
- **Space** to play/pause
- **←/→** to step frames, or click the frame number.
- **Scroll up/down** in the timeline: move left/right
- **Shift + scroll** in the timeline: zoom in/out

### Prompts
- **Double-click** a text prompt to edit it.
- **Click and drag** the right edge of a prompt box to extend/shorten it.
- **Click empty space** to add a prompt.
- **Right-click** a prompt to delete it.

### Generate
- Go to the **Generate** tab to modify options
- It is also possible to **load** examples
- Click **Generate** to generate a motion

### Constraints
- This is **optional**: should be use after a first generation
- **Click** in the timeline tracks (Full-Body / 2D root etc) to add a constraint.
- **Right-click** on a constraint to delete it.
- To **edit** a constraint:
    - Move playback to the target frame
    - Click **Enter Editing Mode** in the Constraints tab.
"""

DEMO_UI_QUICK_START_MODAL_MD = (
    DEMO_UI_QUICK_START_CORE_MD
    + """

See the **Instructions** tab for the full user manual.
"""
)

DEMO_UI_INSTRUCTIONS_TAB_MD = (
    """
## How to Use This Demo

"""
    + DEMO_UI_QUICK_START_CORE_MD
    + """

---

### Generating Motion (step-by-step)

1. **Edit the text prompts** in the timeline (e.g., "A person walks forward.")
2. **Modify the duration** by moving the right edge of each prompts (2–10 seconds)
3. **Add constraints** (optional) to control the motion:
   - Click **Enter Editing Mode** to adjust the character pose
   - Use the timeline to place keyframes or intervals in constraint tracks (see below)
4. **Click Generate** to create the motion
5. If generating multiple samples, **click on a mesh** to select which one to keep

### Timeline Editing

**Adding Constraints:**
1. Click anywhere on the timeline to add a keyframe at that frame. The keyframe is created based on the current character motion.
2. Ctrl/Cmd+click+drag to add an interval constraint, or expand a keyframe into an interval
3. Enter editing mode with the **Enter Editing Mode** button to adjust character pose before/after adding constraints.

**Constraint Types:**
- **Full-Body**: constrains the entire character pose
- **2D Root**: constrains the character's path on the ground plane
  - Enable **Densify** to create a continuous path
- **End-Effectors**: constrains hands and feet positions
  - Use separate tracks for Left/Right Hand/Foot


**Moving & Deleting:**
- **Drag keyframes/intervals** to move them to different frames
- **Right-click** a keyframe or interval to delete it
- Use **Clear All Constraints** to remove everything

**Tips:**
- The posing skeleton becomes visible in editing mode for precise positioning
- Use **Snap to constraint** to align the current frame to a constraint

### Saving & Loading

You can save the current constraints or current motion to load in later from the Load/Save menu.
Saving an **Example** will save the full constraints, motion, and generation metadata.

### Visualization Options

Switch to the **Visualize** tab to:
- Toggle mesh and skeleton visibility
- Adjust mesh opacity
- Show/hide foot contact indicators
- Switch between light and dark modes
"""
)


================================================
FILE: kimodo/demo/embedding_cache.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import contextlib
import contextvars
import hashlib
import json
import os
import threading
import time
from collections import OrderedDict
from dataclasses import dataclass
from typing import Iterable, Optional

import numpy as np
import torch

from kimodo.sanitize import sanitize_texts

_ACTIVE_SESSION = contextvars.ContextVar("kimodo_demo_active_session", default=None)


@dataclass
class CacheStats:
    hits: int = 0
    misses: int = 0
    disk_hits: int = 0


class EmbeddingCache:
    """Disk-backed text embedding cache with a small in-memory LRU."""

    def __init__(
        self,
        *,
        model_name: str,
        encoder_id: str,
        base_dir: Optional[str] = None,
        max_mem_entries: int = 128,
    ) -> None:
        cache_root = base_dir or os.environ.get(
            "kimodo_EMBED_CACHE_DIR",
            os.path.join("~", ".cache", "kimodo_demo", "embeddings"),
        )
        self.base_dir = os.path.expanduser(cache_root)
        self.model_name = model_name
        self.encoder_id = encoder_id
        self.max_mem_entries = max_mem_entries
        self.stats = CacheStats()

        self._lock = threading.Lock()
        self._mem_cache: OrderedDict[str, np.ndarray] = OrderedDict()
        self._index = {}
        self._index_loaded = False

    def _model_dir(self) -> str:
        return os.path.join(self.base_dir, self.model_name)

    def _index_path(self) -> str:
        return os.path.join(self._model_dir(), "index.json")

    def _prewarm_marker_path(self, key: str) -> str:
        return os.path.join(self._model_dir(), f"prewarm_{key}.json")

    def has_prewarm_marker(self, key: str) -> bool:
        return os.path.exists(self._prewarm_marker_path(key))

    def write_prewarm_marker(self, key: str, *, prompt_count: int) -> None:
        os.makedirs(self._model_dir(), exist_ok=True)
        payload = {"prompt_count": prompt_count, "updated_at": time.time()}
        tmp_path = f"{self._prewarm_marker_path(key)}.tmp"
        with open(tmp_path, "w", encoding="utf-8") as f:
            json.dump(payload, f)
        os.replace(tmp_path, self._prewarm_marker_path(key))

    def _load_index(self) -> None:
        if self._index_loaded:
            return
        index_path = self._index_path()
        if os.path.exists(index_path):
            try:
                with open(index_path, "r", encoding="utf-8") as f:
                    self._index = json.load(f)
            except json.JSONDecodeError:
                self._index = {}
        self._index_loaded = True

    def _save_index(self) -> None:
        os.makedirs(self._model_dir(), exist_ok=True)
        tmp_path = f"{self._index_path()}.tmp"
        with open(tmp_path, "w", encoding="utf-8") as f:
            json.dump(self._index, f)
        os.replace(tmp_path, self._index_path())

    def _make_key(self, text: str) -> str:
        key_src = f"{self.model_name}|{self.encoder_id}|{text}"
        return hashlib.sha256(key_src.encode("utf-8")).hexdigest()

    def _entry_path(self, key: str) -> str:
        return os.path.join(self._model_dir(), f"{key}.npy")

    def _mem_get(self, key: str) -> Optional[np.ndarray]:
        if key in self._mem_cache:
            self._mem_cache.move_to_end(key)
            return self._mem_cache[key]
        return None

    def _mem_put(self, key: str, value: np.ndarray) -> None:
        self._mem_cache[key] = value
        self._mem_cache.move_to_end(key)
        while len(self._mem_cache) > self.max_mem_entries:
            self._mem_cache.popitem(last=False)

    def _disk_load(self, key: str) -> Optional[np.ndarray]:
        path = self._entry_path(key)
        if not os.path.exists(path):
            return None
        try:
            return np.load(path)
        except Exception:
            return None

    def _disk_save(self, key: str, value: np.ndarray) -> None:
        os.makedirs(self._model_dir(), exist_ok=True)
        np.save(self._entry_path(key), value)
        self._index[key] = {
            "length": int(value.shape[0]),
            "dtype": str(value.dtype),
            "updated_at": time.time(),
        }

    def _maybe_use_session_cache(self, texts: list[str]):
        session = _ACTIVE_SESSION.get()
        if session is None:
            return None
        if session.last_prompt_texts == texts and session.last_prompt_embeddings is not None:
            return session.last_prompt_embeddings, session.last_prompt_lengths
        return None

    def _update_session_cache(self, texts: list[str], tensor: torch.Tensor, lengths: list[int]) -> None:
        session = _ACTIVE_SESSION.get()
        if session is None:
            return
        session.last_prompt_texts = texts
        session.last_prompt_embeddings = tensor
        session.last_prompt_lengths = lengths

    def get_or_encode(self, texts: Iterable[str], encoder):
        if isinstance(texts, str):
            texts = [texts]
        texts = sanitize_texts(list(texts))
        if len(texts) == 0:
            empty = torch.empty()
            return empty, []

        session_cache = self._maybe_use_session_cache(texts)
        if session_cache is not None:
            return session_cache

        arrays: list[Optional[np.ndarray]] = [None] * len(texts)
        lengths: list[int] = [0] * len(texts)
        misses: list[tuple[int, str, str]] = []

        with self._lock:
            self._load_index()
            for idx, text in enumerate(texts):
                key = self._make_key(text)
                cached = self._mem_get(key)
                if cached is not None:
                    arrays[idx] = cached
                    lengths[idx] = cached.shape[0]
                    self.stats.hits += 1
                    continue

                cached = self._disk_load(key)
                if cached is not None:
                    arrays[idx] = cached
                    lengths[idx] = cached.shape[0]
                    self._mem_put(key, cached)
                    self.stats.disk_hits += 1
                    continue

                misses.append((idx, text, key))
                self.stats.misses += 1

        if misses:
            miss_texts = [text for _, text, _ in misses]
            miss_tensor, miss_lengths = encoder(miss_texts)
            miss_tensor = miss_tensor.detach().cpu()
            miss_tensor_np = miss_tensor.numpy()

            with self._lock:
                self._load_index()
                for miss_idx, length in enumerate(miss_lengths):
                    idx, _text, key = misses[miss_idx]
                    arr = miss_tensor_np[miss_idx, :length].copy()
                    arrays[idx] = arr
                    lengths[idx] = int(length)
                    self._mem_put(key, arr)
                    self._disk_save(key, arr)
                self._save_index()

        max_len = max(lengths) if lengths else 0
        feat_dim = arrays[0].shape[-1] if arrays[0] is not None else 0
        dtype = arrays[0].dtype if arrays[0] is not None else np.float32
        padded = np.zeros((len(texts), max_len, feat_dim), dtype=dtype)
        for idx, arr in enumerate(arrays):
            if arr is None:
                continue
            padded[idx, : arr.shape[0]] = arr

        result = torch.from_numpy(padded)
        self._update_session_cache(texts, result, lengths)
        return result, lengths


class CachedTextEncoder:
    """Wrapper around a text encoder to add disk-backed caching."""

    def __init__(self, encoder, *, model_name: str, base_dir: Optional[str] = None):
        self.encoder = encoder
        self.model_name = model_name
        encoder_id = f"{type(encoder).__name__}"
        self.cache = EmbeddingCache(model_name=model_name, encoder_id=encoder_id, base_dir=base_dir)

    def __call__(self, texts):
        return self.cache.get_or_encode(texts, self.encoder)

    def prewarm(self, texts) -> None:
        if isinstance(texts, str):
            texts = [texts]
        texts = sanitize_texts(list(texts))
        prewarm_key = hashlib.sha256("|".join(texts).encode("utf-8")).hexdigest()
        if self.cache.has_prewarm_marker(prewarm_key):
            return
        self.cache.get_or_encode(texts, self.encoder)
        self.cache.write_prewarm_marker(prewarm_key, prompt_count=len(texts))

    def to(self, device=None, dtype=None):
        if hasattr(self.encoder, "to"):
            self.encoder.to(device=device, dtype=dtype)
        return self

    @contextlib.contextmanager
    def session_context(self, session):
        token = _ACTIVE_SESSION.set(session)
        try:
            yield
        finally:
            _ACTIVE_SESSION.reset(token)

    def __getattr__(self, name):
        return getattr(self.encoder, name)


================================================
FILE: kimodo/demo/generation.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from collections import defaultdict
from typing import Optional

import numpy as np
import torch

import viser
from kimodo.constraints import (
    TYPE_TO_CLASS,
    FullBodyConstraintSet,
    Root2DConstraintSet,
)
from kimodo.exports.mujoco import apply_g1_real_robot_projection
from kimodo.skeleton import G1Skeleton34, SOMASkeleton30
from kimodo.tools import seed_everything

from .embedding_cache import CachedTextEncoder
from .state import ClientSession, ModelBundle


def compute_model_constraints_lst(
    session: ClientSession,
    model_bundle: ModelBundle,
    num_frames: int,
    device: str,
):
    """Compute the lst of constraints for the model based on the constraints in viser."""
    assert len(session.motions) == 1, "Only one motion allowed for constrained generation"
    if not session.constraints:
        return []

    model_skeleton = model_bundle.model.skeleton
    # For SOMA, UI uses somaskel77; extract 30-joint subset for the model
    use_skel_slice = isinstance(model_skeleton, SOMASkeleton30) and session.skeleton.nbjoints != model_skeleton.nbjoints
    skel_slice = model_skeleton.get_skel_slice(session.skeleton) if use_skel_slice else None

    dense_smooth_root_pos_2d = None
    if session.constraints["2D Root"].dense_path:
        # get the full 2d root
        dense_smooth_root_pos_2d = session.constraints["2D Root"].get_constraint_info(device=device)["root_pos"][
            :, [0, 2]
        ]

    model_constraints = []
    for track_name, constraint in session.constraints.items():
        constraint_info = constraint.get_constraint_info(device=device)
        frame_idx = constraint_info["frame_idx"]
        # drop any constraints outside the generation range
        valid_info = [(i, fi) for i, fi in enumerate(frame_idx) if fi < num_frames]
        valid_idx = [i for i, _ in valid_info]
        valid_frame_idx = [fi for _, fi in valid_info]

        if len(valid_frame_idx) == 0:
            continue

        frame_indices = torch.tensor(valid_frame_idx)
        if track_name == "2D Root":
            smooth_root_pos_2d = constraint_info["root_pos"][valid_idx][:, [0, 2]].to(device)
            # same as "smooth_root_2d"
            model_constraints.append(
                Root2DConstraintSet(
                    model_skeleton,
                    frame_indices,
                    smooth_root_pos_2d,
                )
            )
        elif track_name == "Full-Body":
            constraint_joints_pos = constraint_info["joints_pos"][valid_idx].to(device)
            constraint_joints_rot = constraint_info["joints_rot"][valid_idx].to(device)
            if skel_slice is not None:
                constraint_joints_pos = constraint_joints_pos[:, skel_slice]
                constraint_joints_rot = constraint_joints_rot[:, skel_slice]

            smooth_root_pos_2d = None
            if dense_smooth_root_pos_2d is not None:
                smooth_root_pos_2d = dense_smooth_root_pos_2d[frame_indices]

            model_constraints.append(
                FullBodyConstraintSet(
                    model_skeleton,
                    frame_indices,
                    constraint_joints_pos,
                    constraint_joints_rot,
                    smooth_root_2d=smooth_root_pos_2d,
                )
            )
        elif track_name == "End-Effectors":
            constraint_joints_pos = constraint_info["joints_pos"][valid_idx].to(device)
            constraint_joints_rot = constraint_info["joints_rot"][valid_idx].to(device)
            if skel_slice is not None:
                constraint_joints_pos = constraint_joints_pos[:, skel_slice]
                constraint_joints_rot = constraint_joints_rot[:, skel_slice]

            end_effector_type_set_lst = [
                end_effector_type_set
                for i, end_effector_type_set in enumerate(constraint_info["end_effector_type"])
                if i in valid_idx
            ]

            # regroup the end effector data by type
            cls_idx = defaultdict(list)
            for idx, end_effector_type_set in enumerate(end_effector_type_set_lst):
                for end_effector_type in end_effector_type_set:
                    cls_idx[TYPE_TO_CLASS[end_effector_type]].append(idx)

            for cls, lst_idx in cls_idx.items():
                frame_indices_cls = frame_indices[lst_idx]
                smooth_root_pos_2d = None
                if dense_smooth_root_pos_2d is not None:
                    smooth_root_pos_2d = dense_smooth_root_pos_2d[frame_indices_cls]

                constraint_joints_pos_el = constraint_joints_pos[lst_idx]
                constraint_joints_rot_el = constraint_joints_rot[lst_idx]

                model_constraints.append(
                    cls(
                        model_skeleton,
                        frame_indices_cls,
                        constraint_joints_pos_el,
                        constraint_joints_rot_el,
                        smooth_root_2d=smooth_root_pos_2d,
                    )
                )
        else:
            raise ValueError(f"Unsupported constraint type: {constraint.display_name}")
    return model_constraints


def generate(
    *,
    client: viser.ClientHandle,
    session: ClientSession,
    model_bundle: ModelBundle,
    prompts: list[str],
    num_frames: list[int],
    num_samples: int,
    seed: int,
    diffusion_steps: int,
    cfg_weight: Optional[list[float]] = None,
    cfg_type: Optional[str] = None,
    postprocess_parameters: Optional[dict] = None,
    transitions_parameters: Optional[dict] = None,
    real_robot_rotations: bool = False,
    device: str,
    clear_motions,
    add_character_motion,
) -> None:
    client_id = client.client_id
    print(
        f"Generating {num_samples} samples for a total of {sum(num_frames)} frames with those prompt: {prompts} (client {client_id})"
    )

    seed_everything(seed)

    model_constraints = compute_model_constraints_lst(session, model_bundle, sum(num_frames), device)
    cfg_weight = cfg_weight or [2.0, 2.0]
    postprocess_parameters = postprocess_parameters or {}
    transitions_parameters = transitions_parameters or {}

    encoder = getattr(model_bundle.model, "text_encoder", None)
    if isinstance(encoder, CachedTextEncoder):
        with encoder.session_context(session):
            pred_joints_output = model_bundle.model(
                prompts,
                num_frames,
                diffusion_steps,
                multi_prompt=True,
                constraint_lst=model_constraints,
                cfg_weight=cfg_weight,
                num_samples=num_samples,
                cfg_type=cfg_type,
                **(postprocess_parameters | transitions_parameters),
            )  # [B, T, motion_rep_dim]
    else:
        pred_joints_output = model_bundle.model(
            prompts,
            num_frames,
            diffusion_steps,
            multi_prompt=True,
            constraint_lst=model_constraints,
            cfg_weight=cfg_weight,
            num_samples=num_samples,
            cfg_type=cfg_type,
            **(postprocess_parameters | transitions_parameters),
        )  # [B, T, motion_rep_dim]

    joints_pos = pred_joints_output["posed_joints"]  # [B, T, J, 3]
    joints_rot = pred_joints_output["global_rot_mats"]
    foot_contacts = pred_joints_output.get("foot_contacts")

    # Optionally project G1 to real robot DoF (1-DoF per joint, clamped) for display.
    if real_robot_rotations and isinstance(session.skeleton, G1Skeleton34):
        joints_pos, joints_rot = apply_g1_real_robot_projection(
            session.skeleton,
            pred_joints_output["posed_joints"],
            pred_joints_output["global_rot_mats"],
            clamp_to_limits=True,
        )

    # Display on characters (callbacks keep this module UI-agnostic).
    clear_motions(client_id)
    # Keep one sample centered at the origin so constraints align.
    spread_factor = 1.0  # meters
    center_idx = num_samples // 2
    x_trans = (np.arange(num_samples) - center_idx) * spread_factor
    for i in range(num_samples):
        cur_joints_pos = joints_pos[i]
        cur_joints_pos[..., 0] += x_trans[i]
        add_character_motion(
            client,
            session.skeleton,
            cur_joints_pos,
            joints_rot[i],
            foot_contacts[i],
        )


================================================
FILE: kimodo/demo/queue_manager.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""HF mode user queue and session time limit."""

import math
import threading
import time
from collections.abc import Callable
from typing import Any

import viser

from .config import DEMO_UI_QUICK_START_MODAL_MD, MAX_SESSION_MINUTES

# Link for "Duplicate this Space" on Hugging Face (used in queue and expiry modals).
DUPLICATE_SPACE_URL = "https://huggingface.co/spaces/nvidia/Kimodo?duplicate=true"
GITHUB_REPO_URL = "https://github.com/nv-tlabs/kimodo"

# How often to refresh queue modal content (position, total, estimated wait).
QUEUE_MODAL_REFRESH_INTERVAL_SEC = 15


class UserQueue:
    """Thread-safe queue: active users (with activation timestamp) and waiting queue."""

    def __init__(self, max_active: int, max_minutes: float) -> None:
        self._max_active = max_active
        self._max_minutes = max_minutes
        self._max_seconds = max_minutes * 60.0
        self._active: dict[int, float] = {}  # client_id -> activation timestamp
        self._queued: list[int] = []
        self._lock = threading.Lock()

    def try_activate(self, client_id: int) -> bool:
        """If a slot is free, add client as active and return True.

        Else return False.
        """
        with self._lock:
            if len(self._active) < self._max_active:
                self._active[client_id] = time.time()
                return True
            return False

    def enqueue(self, client_id: int) -> None:
        with self._lock:
            if client_id not in self._queued:
                self._queued.append(client_id)

    def remove(self, client_id: int) -> bool:
        """Remove from active or queue.

        Returns True if was active.
        """
        with self._lock:
            was_active = client_id in self._active
            self._active.pop(client_id, None)
            if client_id in self._queued:
                self._queued.remove(client_id)
            return was_active

    def promote_next(self) -> int | None:
        """If queue non-empty, pop first, activate them, return their client_id.

        Else None.
        """
        with self._lock:
            if not self._queued:
                return None
            client_id = self._queued.pop(0)
            self._active[client_id] = time.time()
            return client_id

    def get_queue_position(self, client_id: int) -> tuple[int, int] | None:
        """(1-based position, total_in_queue) or None if not queued."""
        with self._lock:
            if client_id not in self._queued:
                return None
            pos = self._queued.index(client_id)
            return (pos + 1, len(self._queued))

    def get_estimated_wait_seconds(self, client_id: int) -> float:
        """Estimated seconds until this queued client gets a slot."""
        with self._lock:
            if client_id not in self._queued:
                return 0.0
            pos = self._queued.index(client_id) + 1  # 1-based
            # Expiry times of active users (when they free a slot)
            now = time.time()
            expiries = sorted(now + self._max_seconds - (now - t) for t in self._active.values())
            if not expiries:
                return 0.0
            # Nth slot to free (1-indexed) wraps over expiries
            idx = (pos - 1) % len(expiries)
            cycles = (pos - 1) // len(expiries)
            slot_free_time = expiries[idx] + cycles * self._max_seconds
            return max(0.0, slot_free_time - now)

    def is_active(self, client_id: int) -> bool:
        with self._lock:
            return client_id in self._active

    def was_active(self, client_id: int) -> bool:
        """True if client is currently active (for use when already holding lock)."""
        return client_id in self._active


def _format_wait(seconds: float) -> str:
    if seconds < 60:
        return "less than a minute"
    mins = int(math.ceil(seconds / 60))
    return f"~{mins} minute{'s' if mins != 1 else ''}"


def _queue_modal_markdown(position: int, total: int, estimated_wait_sec: float) -> str:
    wait_str = _format_wait(estimated_wait_sec)
    mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES
    return f"""## Kimodo Demo — Please Wait

This demo runs with limited capacity.
Each user gets **{mins} minute{"s" if mins != 1 else ""}** of interactive time.

**Your position in queue:** {position} / {total}

**Estimated wait:** {wait_str}

Please keep this tab open — the demo will start automatically when it's your turn.

---
*Want unlimited access? [Duplicate this Space]({DUPLICATE_SPACE_URL}) or clone the [GitHub repo]({GITHUB_REPO_URL}) to run locally!*
"""


def _welcome_modal_markdown() -> str:
    mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES
    return f"""## Welcome to Kimodo Demo

You have been granted a **{mins}-minute** demo session.
Your session timer has started.

Click the button below to begin!
"""


def _expiry_modal_markdown() -> str:
    mins = int(MAX_SESSION_MINUTES) if MAX_SESSION_MINUTES == int(MAX_SESSION_MINUTES) else MAX_SESSION_MINUTES
    return f"""## Session Expired

Your {mins}-minute demo session has ended.
Thank you for trying Kimodo!

Refresh this page to rejoin the queue, or [duplicate this Space]({DUPLICATE_SPACE_URL}) for unlimited access.
"""


class QueueManager:
    """Orchestrates HF mode: queue modals, welcome modal, session timer, promotion."""

    def __init__(
        self,
        queue: UserQueue,
        server: viser.ViserServer,
        setup_demo_for_client: Callable[[viser.ClientHandle], None],
        cleanup_session: Callable[[int], None],
    ) -> None:
        self._queue = queue
        self._server = server
        self._setup_demo_for_client = setup_demo_for_client
        self._cleanup_session = cleanup_session
        self._max_seconds = queue._max_seconds

        self._queue_modal_handles: dict[int, tuple[Any, Any]] = {}
        self._welcome_modal_handles: dict[int, Any] = {}
        self._expiry_timers: dict[int, threading.Timer] = {}
        self._lock = threading.Lock()
        self._refresh_stop = threading.Event()
        self._refresh_thread = threading.Thread(
            target=self._queue_modal_refresh_loop,
            name="queue-modal-refresh",
            daemon=True,
        )
        self._refresh_thread.start()

    def _queue_modal_refresh_loop(self) -> None:
        """Periodically refresh queue modals so position, total, and estimated wait stay current."""
        while not self._refresh_stop.wait(timeout=QUEUE_MODAL_REFRESH_INTERVAL_SEC):
            self._update_all_queue_modals()

    def on_client_connect(self, client: viser.ClientHandle) -> None:
        """Handle new connection: activate if slot free, else enqueue and show queue modal."""
        client_id = client.client_id
        if self._queue.try_activate(client_id):
            try:
                self._setup_demo_for_client(client)
            except RuntimeError as e:
                if "CUDA error" in str(e):
                    print(f"CUDA error while setting up client {client_id}: {e}")
                    return
                raise
            self._start_session_timer(client_id)
            self._show_welcome_modal(client)
        else:
            self._queue.enqueue(client_id)
            self._show_queue_modal(client)
            self._update_all_queue_modals()

    def on_client_disconnect(self, client_id: int) -> None:
        """Remove from queue/active, cancel timer, promote next if was active.

        Session/scene cleanup is done by the demo's on_client_disconnect.
        """
        with self._lock:
            self._expiry_timers.pop(client_id, None)
            self._queue_modal_handles.pop(client_id, None)
            self._welcome_modal_handles.pop(client_id, None)
        was_active = self._queue.remove(client_id)
        if was_active:
            self._promote_next_user()
        else:
            self._update_all_queue_modals()

    def _show_queue_modal(self, client: viser.ClientHandle) -> None:
        client_id = client.client_id
        pos, total = self._queue.get_queue_position(client_id) or (0, 0)
        wait_sec = self._queue.get_estimated_wait_seconds(client_id)
        md_content = _queue_modal_markdown(pos, total, wait_sec)

        modal = client.gui.add_modal(
            "Kimodo Demo — Please Wait",
            size="xl",
            show_close_button=False,
        )
        with modal:
            md_handle = client.gui.add_markdown(md_content)
        with self._lock:
            self._queue_modal_handles[client_id] = (modal, md_handle)

    def _show_quick_start_modal(self, client: viser.ClientHandle) -> None:
        """Show the quick start instructions modal (same as non-HF mode)."""
        with client.gui.add_modal(
            "Welcome — Quick Start",
            size="xl",
            show_close_button=True,
            save_choice="kimodo.demo.quick_start_ack",
        ) as quick_start_modal:
            client.gui.add_markdown(DEMO_UI_QUICK_START_MODAL_MD)
            client.gui.add_button("Got it (don't remind me again)").on_click(lambda _: quick_start_modal.close())

    def _show_welcome_modal(self, client: viser.ClientHandle) -> None:
        client_id = client.client_id

        def _on_start_demo(_: Any) -> None:
            modal.close()
            self._show_quick_start_modal(client)

        modal = client.gui.add_modal(
            "Welcome to Kimodo Demo",
            size="xl",
            show_close_button=True,
        )
        with modal:
            client.gui.add_markdown(_welcome_modal_markdown())
            client.gui.add_button("Start Demo").on_click(_on_start_demo)
        with self._lock:
            self._welcome_modal_handles[client_id] = modal

    def _update_all_queue_modals(self) -> None:
        with self._lock:
            handles = list(self._queue_modal_handles.items())
        for client_id, (modal, md_handle) in handles:
            pos_total = self._queue.get_queue_position(client_id)
            if pos_total is None:
                continue
            pos, total = pos_total
            wait_sec = self._queue.get_estimated_wait_seconds(client_id)
            try:
                md_handle.content = _queue_modal_markdown(pos, total, wait_sec)
            except Exception:
                pass

    def _promote_next_user(self) -> None:
        promoted_id = self._queue.promote_next()
        if promoted_id is None:
            return
        clients = self._server.get_clients()
        client = clients.get(promoted_id)
        if client is None:
            return
        with self._lock:
            old = self._queue_modal_handles.pop(promoted_id, None)
        if old is not None:
            try:
                old[0].close()
            except Exception:
                pass
        try:
            self._setup_demo_for_client(client)
        except RuntimeError as e:
            if "CUDA error" in str(e):
                print(f"CUDA error while setting up client {promoted_id}: {e}")
                return
            raise
        self._start_session_timer(promoted_id)
        self._show_welcome_modal(client)
        self._update_all_queue_modals()

    def _start_session_timer(self, client_id: int) -> None:
        def on_expiry() -> None:
            self._on_session_expired(client_id)

        t = threading.Timer(self._max_seconds, on_expiry)
        t.daemon = True
        with self._lock:
            self._expiry_timers[client_id] = t
        t.start()

    def _on_session_expired(self, client_id: int) -> None:
        with self._lock:
            self._expiry_timers.pop(client_id, None)
        if not self._queue.is_active(client_id):
            return
        self._queue.remove(client_id)
        clients = self._server.get_clients()
        client = clients.get(client_id)
        if client is not None:
            try:
                with client.gui.add_modal(
                    "Session Expired",
                    size="lg",
                    show_close_button=False,
                ) as modal_ctx:
                    client.gui.add_markdown(_expiry_modal_markdown())
            except Exception:
                pass
        self._cleanup_session(client_id)
        self._promote_next_user()


================================================
FILE: kimodo/demo/state.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from dataclasses import dataclass, field
from typing import Optional

import torch

import kimodo.viz.viser_utils as viser_utils
import viser
from kimodo.skeleton import SkeletonBase
from kimodo.viz.viser_utils import GuiElements

from .config import (
    DEFAULT_CUR_DURATION,
    DEFAULT_MODEL,
    DEFAULT_PLAYBACK_SPEED,
)


@dataclass(frozen=True)
class ModelBundle:
    model: object
    motion_rep: object
    skeleton: SkeletonBase
    model_fps: float


@dataclass
class ClientSession:
    """Per-client session data."""

    client: viser.ClientHandle
    gui_elements: GuiElements
    motions: dict  # character_name -> CharacterMotion
    constraints: dict[str, viser_utils.ConstraintSet] = field(default_factory=dict)
    timeline_data: object = None
    frame_idx: int = 0
    playing: bool = False
    playback_speed: float = DEFAULT_PLAYBACK_SPEED
    cur_duration: float = DEFAULT_CUR_DURATION
    max_frame_idx: int = 100  # will be updated based on model_fps
    updating_motions: bool = False
    edit_mode: bool = False
    model_name: str = DEFAULT_MODEL
    model_fps: float = 0.0
    skeleton: SkeletonBase | None = None
    motion_rep: object | None = None
    examples_base_dir: str = ""
    example_dict: dict[str, str] = field(default_factory=dict)
    gui_examples_dropdown: Optional[viser.GuiInputHandle] = None
    gui_save_example_path_text: Optional[viser.GuiInputHandle] = None
    gui_model_selector: Optional[viser.GuiInputHandle] = None
    last_prompt_texts: Optional[list[str]] = None
    last_prompt_embeddings: Optional[torch.Tensor] = None
    last_prompt_lengths: Optional[list[int]] = None
    edit_mode_snapshot: Optional[dict[int, dict[str, object]]] = None
    undo_drag_snapshot: Optional[dict[str, object]] = None
    show_only_current_constraint: bool = False  # False = Show All, True = Show only Current


================================================
FILE: kimodo/demo/ui.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# ruff: noqa: I001
import math
import os
import threading
from typing import Optional

from kimodo.constraints import load_constraints_lst, save_constraints_lst
from kimodo.exports.bvh import motion_to_bvh_bytes, save_motion_bvh
from kimodo.exports.motion_io import (
    amass_npz_to_bytes,
    g1_csv_to_bytes,
    kimodo_npz_to_bytes,
    load_motion_file,
    save_kimodo_npz,
)
from kimodo.model.registry import kimodo_short_key_for_skeleton_dataset, registry_skeleton_for_joint_count
from kimodo.tools import to_torch
from kimodo.viz import viser_utils
from kimodo.viz.viser_utils import GuiElements
import numpy as np
import torch
import viser
from viser._timeline_api import PROMPT_COLORS

from . import generation
from .config import (
    DEFAULT_CUR_DURATION,
    DEMO_UI_INSTRUCTIONS_TAB_MD,
    get_datasets,
    get_model_info,
    get_models_for_dataset_skeleton,
    get_skeleton_display_name,
    get_skeleton_display_names_for_dataset,
    get_skeleton_key_from_display_name,
    get_short_key_from_display_name,
    HF_MODE,
    INIT_POSTPROCESSING,
    MODEL_NAMES,
    NB_TRANSITION_FRAMES,
    SHOW_TRANSITION_PARAMS,
)
from .state import ClientSession
from kimodo.skeleton import G1Skeleton34, SOMASkeleton30, SOMASkeleton77


def extract_intervals_and_singles(t: torch.Tensor):
    intervals = []
    intervals_indices = []
    single_frames = []
    single_frames_indices = []

    start_idx = 0

    for i in range(1, len(t) + 1):
        # End of run if:
        #  - end of tensor
        #  - non-consecutive value
        if i == len(t) or t[i] != t[i - 1] + 1:
            run_length = i - start_idx

            if run_length >= 2:
                intervals.append((int(t[start_idx]), int(t[i - 1])))
                intervals_indices.append((start_idx, i - 1))
            else:
                single_frames.append(int(t[start_idx]))
                single_frames_indices.append(start_idx)

            start_idx = i

    return intervals, intervals_indices, single_frames, single_frames_indices


def create_gui(
    demo,
    client: viser.ClientHandle,
    model_name: str,
    model_fps: float,
):
    """Create GUI elements for a specific client."""
    client_id = client.client_id

    def get_active_session(event_client: viser.ClientHandle | None):
        if event_client is None:
            return None
        if not demo.client_active(event_client.client_id):
            return None
        return demo.client_sessions[event_client.client_id]

    def build_timeline_tracks():
        timeline = client.timeline
        demo.set_timeline_defaults(timeline, model_fps)
        timeline.set_visible(True)
        timeline.set_current_frame(0)

        timeline_tracks = {}
        fullbody_id = timeline.add_track(
            "Full-Body",
            track_type="keyframe",
            color=(219, 148, 86),
            height_scale=0.5,
        )
        timeline_tracks[fullbody_id] = {
            "name": "Full-Body",
            "track_type": "keyframe",
            "color": (219, 148, 86),
            "height_scale": 0.5,
        }

        root2d_id = timeline.add_track(
            "2D Root",
            track_type="keyframe",
            color=(150, 100, 200),
            height_scale=0.5,
        )
        timeline_tracks[root2d_id] = {
            "name": "2D Root",
            "track_type": "keyframe",
            "color": (150, 100, 200),
            "height_scale": 0.5,
        }
        lefthand_id = timeline.add_track(
            "Left Hand",
            track_type="keyframe",
            color=(100, 200, 150),
            height_scale=0.5,
        )
        timeline_tracks[lefthand_id] = {
            "name": "Left Hand",
            "track_type": "keyframe",
            "color": (100, 200, 150),
            "height_scale": 0.5,
        }
        righthand_id = timeline.add_track(
            "Right Hand",
            track_type="keyframe",
            color=(200, 100, 150),
            height_scale=0.5,
        )
        timeline_tracks[righthand_id] = {
            "name": "Right Hand",
            "track_type": "keyframe",
            "color": (200, 100, 150),
            "height_scale": 0.5,
        }
        leftfoot_id = timeline.add_track(
            "Left Foot",
            track_type="keyframe",
            color=(219, 148, 86),
            height_scale=0.5,
        )
        timeline_tracks[leftfoot_id] = {
            "name": "Left Foot",
            "track_type": "keyframe",
            "color": (219, 148, 86),
            "height_scale": 0.5,
        }
        rightfoot_id = timeline.add_track(
            "Right Foot",
            track_type="keyframe",
            color=(150, 100, 200),
            height_scale=0.5,
        )
        timeline_tracks[rightfoot_id] = {
            "name": "Right Foot",
            "track_type": "keyframe",
            "color": (150, 100, 200),
            "height_scale": 0.5,
        }
        return timeline, timeline_tracks

    timeline, timeline_tracks = build_timeline_tracks()
    # These handles are part of GuiElements, but the demo currently uses timeline + buttons
    # embedded in the Viser UI instead of custom controls.
    gui_play_pause_button = None
    gui_next_frame_button = None
    gui_prev_frame_button = None
    gui_timeline = None
    gui_duration_slider = None

    # now other gui elements
    tab_group = client.gui.add_tab_group()

    #
    # Playback and Motion generation controls
    #
    with tab_group.add_tab("Generate", viser.Icon.WALK):
        with client.gui.add_folder("Model Selection", expand_by_default=True):
            info = get_model_info(model_name)
            if info is None:
                info = get_model_info(next(iter(MODEL_NAMES)))

            def get_allowed_skeleton_labels(dataset_ui_label: str) -> list[str]:
                labels = get_skeleton_display_names_for_dataset(dataset_ui_label, family="Kimodo")
                if HF_MODE:
                    labels = [label for label in labels if get_skeleton_key_from_display_name(label) != "SMPLX"]
                return labels

            dataset_ui_label = "Rigplay" if HF_MODE else info.dataset_ui_label
            datasets = ["Rigplay"] if HF_MODE else get_datasets(family="Kimodo")
            skeleton_labels = get_allowed_skeleton_labels(dataset_ui_label)
            initial_skeleton_label = get_skeleton_display_name(info.skeleton)
            if initial_skeleton_label not in skeleton_labels and skeleton_labels:
                initial_skeleton_label = skeleton_labels[0]
            initial_skeleton_key = (
                get_skeleton_key_from_display_name(initial_skeleton_label) if skeleton_labels else None
            )
            models_for_pair = (
                get_models_for_dataset_skeleton(dataset_ui_label, initial_skeleton_key, family="Kimodo")
                if initial_skeleton_key is not None
                else []
            )
            version_options = [m.display_name for m in models_for_pair]
            initial_version = (
                info.display_name
                if info.display_name in version_options
                else (version_options[0] if version_options else "")
            )
            gui_dataset_selector = client.gui.add_dropdown(
                "Training dataset",
                options=datasets,
                initial_value=dataset_ui_label,
                visible=not HF_MODE,
            )
            gui_skeleton_selector = client.gui.add_dropdown(
                "Model" if HF_MODE else "Skeleton",
                options=skeleton_labels,
                initial_value=initial_skeleton_label,
            )
            gui_version_selector = client.gui.add_dropdown(
                "Version",
                options=version_options,
                initial_value=initial_version,
            )
            gui_version_selector.visible = len(models_for_pair) > 1
            gui_model_display = client.gui.add_markdown(
                content=f"**Model:** {initial_version}",
            )
            gui_load_model_button = client.gui.add_button(
                "Load model",
                hint="Load the selected model (dataset, skeleton, version).",
            )

            class ModelSelectorHandle:
                """Wrapper so session and callbacks can treat three dropdowns as one."""

                def __init__(self):
                    self._dataset = gui_dataset_selector
                    self._skeleton = gui_skeleton_selector
                    self._version = gui_version_selector
                    self._display = gui_model_display

                @property
                def value(self) -> str:
                    return get_short_key_from_display_name(self._version.value) or ""

                def set_from_short_key(self, short_key: str) -> None:
                    info = get_model_info(short_key)
                    if info is None:
                        return
                    dataset_ui_label = "Rigplay" if HF_MODE else info.dataset_ui_label
                    self._dataset.value = dataset_ui_label
                    self._skeleton.options = get_allowed_skeleton_labels(dataset_ui_label)
                    skeleton_label = get_skeleton_display_name(info.skeleton)
                    if skeleton_label not in self._skeleton.options and self._skeleton.options:
                        skeleton_label = self._skeleton.options[0]
                    self._skeleton.value = skeleton_label
                    skeleton_key = get_skeleton_key_from_display_name(skeleton_label)
                    if skeleton_key is None:
                        return
                    models = get_models_for_dataset_skeleton(dataset_ui_label, skeleton_key, family="Kimodo")
                    self._version.options = [m.display_name for m in models]
                    self._version.value = (
                        info.display_name if info.display_name in self._version.options else self._version.options[0]
                    )
                    self._version.visible = len(models) > 1
                    self._display.content = f"**Model:** {self._version.value}"

            gui_model_selector = ModelSelectorHandle()

        with client.gui.add_folder("Examples", expand_by_default=True):
            examples_base_dir = demo.get_examples_base_dir(model_name, absolute=True)
            example_dict = viser_utils.load_example_cases(examples_base_dir)
            example_names = list(example_dict.keys())
            if not example_names:
                example_names = ["<no examples>"]
            gui_examples_dropdown = client.gui.add_dropdown(
                "Example",
                options=example_names,
                initial_value=example_names[0],
            )
            gui_load_example_button = client.gui.add_button(
                "Load Example",
                hint="Load the selected example.",
                disabled=not example_dict,
            )

            def update_examples_dropdown(
                new_example_dict: dict[str, str],
                keep_selection: bool = True,
            ) -> None:
                if not new_example_dict:
                    gui_examples_dropdown.options = ["<no examples>"]
                    gui_examples_dropdown.value = "<no examples>"
                    gui_load_example_button.disabled = True
                    return
                gui_load_example_button.disabled = False
                example_names_local = list(new_example_dict.keys())
                gui_examples_dropdown.options = example_names_local
                if keep_selection and gui_examples_dropdown.value in example_names_local:
                    return
                gui_examples_dropdown.value = example_names_local[0]

        with client.gui.add_folder("Generate", expand_by_default=True):
            gui_duration = client.gui.add_markdown(content=f"Total duration: {DEFAULT_CUR_DURATION:.1f} (sec)")

            def update_duration_gui(duration):
                gui_duration.content = f"Total duration: {duration:.1f} (sec)"

            def compute_prompt_num_frames(prompt_values):
                """Convert timeline prompt bounds to per-prompt frame counts.

                Convention in this demo:
                - All prompts except the last are treated as [start_frame, end_frame)
                  (end is exclusive).
                - The last prompt is treated as [start_frame, end_frame] (end is inclusive).
                - This assumes the prompts values are sorted by start_frame.
                """
                if len(prompt_values) == 0:
                    return []
                num_frames = []
                for i, x in enumerate(prompt_values):
                    cur = x.end_frame - x.start_frame
                    if i == len(prompt_values) - 1:
                        cur += 1
                    num_frames.append(cur)
                return num_frames

            def update_duration_auto():
                session = demo.client_sessions[client_id]
                prompt_values = sorted(
                    [x for x in timeline._prompts.values()],
                    key=lambda x: x.start_frame,
                )
                num_frames = compute_prompt_num_frames(prompt_values)
                total_nb_frames = sum(num_frames)
                cur_duration = total_nb_frames / session.model_fps
                set_new_duration(client_id, cur_duration)
                update_duration_gui(cur_duration)

            gui_num_samples_slider = client.gui.add_slider(
                "Num Samples",
                min=1,
                max=10,
                step=1,
                initial_value=1,
                visible=not HF_MODE,
            )

            gui_use_soma_layer_checkbox = client.gui.add_checkbox(
                "SOMA layer",
                initial_value=False,
                visible="soma" in (model_name or ""),
            )

            with client.gui.add_folder("Model Parameters", expand_by_default=False):
                gui_seed = client.gui.add_number("Seed", initial_value=42)

                with client.gui.add_folder("Diffusion", expand_by_default=False):
                    gui_diffusion_steps_slider = client.gui.add_slider(
                        "Denoising Steps",
                        min=2,
                        max=1000,
                        step=10,
                        initial_value=100,
                    )
                with client.gui.add_folder("Classifier-Free Guidance", expand_by_default=False):
                    gui_cfg_checkbox = client.gui.add_checkbox(
                        "Enable",
                        initial_value=True,
                        visible=True,
                    )

                    gui_cfg_text_weight_slider = client.gui.add_slider(
                        "Text Weight",
                        min=0.0,
                        max=5.0,
                        step=0.1,
                        initial_value=2.0,
                        visible=True,
                    )
                    gui_cfg_constraint_weight_slider = client.gui.add_slider(
                        "Constraint Weight",
                        min=0.0,
                        max=5.0,
                        step=0.1,
                        initial_value=2.0,
                        visible=True,
                    )
                with client.gui.add_folder(
                    "Transitions",
                    expand_by_default=False,
                    visible=SHOW_TRANSITION_PARAMS,
                ):
                    gui_num_transition_frames_slider = client.gui.add_slider(
                        "Transition frames",
                        min=1,
                        max=10,
                        step=1,
                        initial_value=NB_TRANSITION_FRAMES,
                        visible=True,
                    )

            with client.gui.add_folder("Post Processing", expand_by_default=False):
                _model_name = model_name or ""
                _postprocess_visible = "g1" not in _model_name
                gui_postprocess_checkbox = client.gui.add_checkbox(
                    "Enable",
                    initial_value=INIT_POSTPROCESSING,
                    hint="Apply motion post-processing (not available for G1)",
                    visible=_postprocess_visible,
                )
                gui_root_margin = client.gui.add_number(
                    "Root Margin",
                    min=0.0,
                    # max=0.5,
                    step=0.01,
                    initial_value=0.04,
                    hint="Margin for root position (meters). Lower values pin root closer to target.",
                    visible=INIT_POSTPROCESSING and _postprocess_visible,
                )

                @gui_postprocess_checkbox.on_update
                def _(event: viser.GuiEvent) -> None:
                    if get_active_session(event.client) is None:
                        return
                    # disable the slider if sharing transition is False
                    gui_root_margin.visible = gui_postprocess_checkbox.value

                gui_real_robot_rotations_checkbox = client.gui.add_checkbox(
                    "Real robot rotations",
                    initial_value=False,
                    hint="Project joint rotations to G1 real robot DoF (1-DoF per joint) and clamp to axis limits from the MuJoCo XML.",
                    visible="g1" in _model_name,
                )

            gui_generate_button = client.gui.add_button("Generate", color="green")
        with client.gui.add_folder("Constraints", expand_by_default=False):
            gui_gizmo_space_dropdown = client.gui.add_dropdown(
                "Gizmo space",
                ("Local", "World"),
                initial_value="Local",
                visible="g1" not in _model_name,
            )
            gui_edit_constraint_button = client.gui.add_button("Enter Editing Mode")
            gui_snap_to_constraint_button = client.gui.add_button(
                "Snap to Constraint",
                disabled=True,
            )
            gui_reset_constraint_button = client.gui.add_button(
                "Reset Constraint",
                disabled=True,
            )
            gui_undo_drag_button = client.gui.add_button(
                "Undo Move",
                disabled=True,
            )

            with client.gui.add_folder("Root 2D Options", expand_by_default=True):
                gui_dense_path_checkbox = client.gui.add_checkbox(
                    "Make Smooth Path",
                    initial_value=False,
                    visible=True,
                )

            gui_show_only_current_constraint_checkbox = client.gui.add_checkbox(
                "Show only Current",
                initial_value=False,
                hint="Show only constraint overlays at the current frame; uncheck to show all.",
            )

            def apply_constraint_overlay_visibility(session: ClientSession) -> None:
                demo._apply_constraint_overlay_visibility(session)

            @gui_show_only_current_constraint_checkbox.on_update
            def _(event: viser.GuiEvent) -> None:
                session = get_active_session(event.client)
                if session is None:
                    return
                session.show_only_current_constraint = gui_show_only_current_constraint_checkbox.value
                apply_constraint_overlay_visibility(session)

            gui_clear_all_constraints_button = client.gui.add_button(
                "Clear All Constraints",
                color="red",
            )

            def has_constraint_at_frame(session: ClientSession, frame_idx: int) -> bool:
                for constraint_name in ["Full-Body", "End-Effectors", "2D Root"]:
                    constraint = session.constraints.get(constraint_name)
                    if constraint is None:
                        continue
                    if frame_idx in constraint.keyframes:
                        return True
                return False

            def update_snap_to_constraint_button(session: ClientSession) -> None:
                gui_snap_to_constraint_button.disabled = not has_constraint_at_frame(session, session.frame_idx)

            def ensure_edit_snapshot(session: ClientSession, motion, frame_idx: int) -> None:
                if session.edit_mode_snapshot is None:
                    session.edit_mode_snapshot = {}
                if frame_idx in session.edit_mode_snapshot:
                    return
                session.edit_mode_snapshot[frame_idx] = {
                    "joints_pos": motion.get_joints_pos(frame_idx),
                    "joints_rot": motion.get_joints_rot(frame_idx),
                }

            def _update_dense_path(motion, session):
                constraint_info = session.constraints["2D Root"].get_constraint_info()

                if len(constraint_info["frame_idx"]) > 0:
                    min_root_frame = min(constraint_info["frame_idx"])
                    max_root_frame = max(constraint_info["frame_idx"])
                    motion.set_projected_root_pos_path(
                        constraint_info["root_pos"][:, [0, 2]],
                        min_frame_idx=min_root_frame,
                        max_frame_idx=max_root_frame,
                    )

            # Delay (ms) after last keyframe/interval move before updating path = "on release".
            DENSE_PATH_AFTER_RELEASE_MS = 300

            def _schedule_dense_path_after_release(session):
                """Schedule a single path update to run after user stops dragging."""
                if "2D Root" not in session.constraints or not session.constraints["2D Root"].dense_path:
                    return
                tdata = session.timeline_data
                if tdata.get("dense_path_after_release_timer"):
                    tdata["dense_path_after_release_timer"].cancel()
                delay = DENSE_PATH_AFTER_RELEASE_MS / 1000.0

                def run():
                    if not demo.client_active(client_id):
                        return
                    sess = demo.client_sessions[client_id]
                    tdata["dense_path_after_release_timer"] = None
                    if "2D Root" not in sess.constraints or not sess.constraints["2D Root"].dense_path:
                        return
                    mot = list(sess.motions.values())[0]
                    _update_dense_path(mot, sess)

                t = threading.Timer(delay, run)
                tdata["dense_path_after_release_timer"] = t
                t.start()

            @gui_dense_path_checkbox.on_update
            def _(event: viser.GuiEvent) -> None:
                session = get_active_session(event.client)
                if session is None:
                    return

                if gui_dense_path_checkbox.value:
                    # Make sure 0 and max_frame_idx keyframes are added to the constraint
                    # since dense path should cover full duration for best model performance
                    root_2d_track = session.timeline_data["tracks_ids"]["2D Root"]

                    # add a locked keyframe at 0
                    start_keyframe_id = client.timeline.add_locked_keyframe(  # noqa
                        root_2d_track,
                        0,
                        opacity=0.0,
                    )
                    session.timeline_data["keyframes"][start_keyframe_id] = {
                        "frame": 0,
                        "track_id": root_2d_track,
                        "locked": True,
                        "opacity": 0.0,
                        "value": None,
                    }
                    add_constraint_callback(
                        start_keyframe_id,
                        "2D Root",
                        (0, 0),
                        verbose=False,
                    )

                    # add a locked keyframe at max_frame_idx
                    end_keyframe_id = client.timeline.add_locked_keyframe(
                        root_2d_track,
                        session.max_frame_idx,
                        opacity=0.0,
                    )
                    session.timeline_data["keyframes"][end_keyframe_id] = {
                        "frame": session.max_frame_idx,
                        "track_id": root_2d_track,
                        "locked": True,
                        "opacity": 0.0,
                        "value": None,
                    }
                    add_constraint_callback(
                        end_keyframe_id,
                        "2D Root",
                        (session.max_frame_idx, session.max_frame_idx),
                        verbose=False,
                    )

                    # add a locked interval only for visual purposes
                    locked_interval = client.timeline.add_locked_interval(  # noqa
                        root_2d_track,
                        start_frame=0,
                        end_frame=session.max_frame_idx,
                    )
                    session.timeline_data["intervals"][locked_interval] = {
                        "track_id": root_2d_track,
                        "start_frame_idx": 0,
                        "end_frame_idx": session.max_frame_idx,
                        "locked": True,
                        "opacity": 0.3,
                        "value": None,
                    }

                session.constraints["2D Root"].set_dense_path(gui_dense_path_checkbox.value)
                if session.constraints["2D Root"].dense_path:
                    # update the character motion to reflect the full path
                    # will be full length by construction, no need to specify min/max frame idx
                    motion = list(session.motions.values())[0]
                    _update_dense_path(motion, session)

                # remove locked interval and locked keyframes
                if not gui_dense_path_checkbox.value:
                    # Get all locked keyframes
                    keyframes_to_remove = []
                    for uuid, keyframe in client.timeline._keyframes.items():
                        if keyframe.locked:
                            keyframes_to_remove.append(uuid)
                            _data = session.timeline_data["keyframes"][uuid]
                            remove_constraint_callback(
                                uuid,
                                constraint_type=session.timeline_data["tracks"][_data["track_id"]]["name"],
                                frame_range=(_data["frame"], _data["frame"]),
                                verbose=False,
                            )

                    intervals_to_remove = []
                    # remove all locked intervals
                    for uuid, interval in client.timeline._intervals.items():
                        if interval.locked:
                            intervals_to_remove.append(uuid)

                    # removing keyframes and intervals
                    for uuid in keyframes_to_remove:
                        client.timeline.remove_keyframe(uuid)

                    for uuid in intervals_to_remove:
                        client.timeline.remove_interval(uuid)

                apply_constraint_overlay_visibility(session)

        with client.gui.add_folder(
            "Load/Save",
            expand_by_default=False,
            visible=not HF_MODE,
        ):
            with client.gui.add_folder("Motion", expand_by_default=False):
                gui_save_motion_path_text = client.gui.add_text("Save Path", initial_value="output")
                gui_save_motion_format_dropdown = client.gui.add_dropdown(
                    "Save Format",
                    options=(
                        ["NPZ", "CSV"]
                        if "g1" in model_name.lower()
                        else ["NPZ", "AMASS NPZ"]
                        if "smplx" in model_name.lower()
                        else ["NPZ", "BVH"]
                    ),
                    initial_value="NPZ",
                )
                gui_save_bvh_standard_tpose_checkbox = client.gui.add_checkbox(
                    "Standard T-pose",
                    initial_value=False,
                    hint="For BVH export, use the standard T-pose rest skeleton.",
                    visible=False,
                )
                gui_save_motion_button = client.gui.add_button(
                    "Save Motion",
                    hint="Save the current motion (format + path above)",
                )
                gui_load_motion_path_text = client.gui.add_text(
                    "Load Path",
                    initial_value="output.npz",
                    hint="SOMA .bvh, Kimodo or AMASS .npz, or G1 MuJoCo .csv",
                )
                gui_load_motion_button = client.gui.add_button(
                    "Load Motion",
                    hint="Load the selected motion",
                )
            with client.gui.add_folder("Constraints", expand_by_default=False):
                gui_save_constraints_path_text = client.gui.add_text(
                    "Save Path", initial_value="output_constraints.json"
                )
                gui_save_constraints_button = client.gui.add_button("Save Constraints")
                gui_load_constraints_path_text = client.gui.add_text(
                    "Load Path", initial_value="output_constraints.json"
                )
                gui_load_constraints_button = client.gui.add_button("Load Constraints")
            with client.gui.add_folder("Example", expand_by_default=False):
                gui_save_example_path_text = client.gui.add_text(
                    "Save Dir",
                    initial_value=os.path.join(
                        demo.get_examples_base_dir(model_name, absolute=True),
                        "custom_example_1",
                    ),
                )
                gui_save_example_button = client.gui.add_button("Save Example")
                gui_load_example_path_text = client.gui.add_text(
                    "Load Dir",
                    initial_value=os.path.join(
                        demo.get_examples_base_dir(model_name, absolute=True),
                        "custom_example_1",
                    ),
                )
                gui_load_gt_checkbox = client.gui.add_checkbox(
                    "Load GT instead",
                    initial_value=False,
                )
                gui_load_example_from_path_button = client.gui.add_button("Load Example")

            def _get_primary_motion(session: ClientSession):
                return list(session.motions.values())[0]

            def _motion_to_numpy_dict(motion) -> dict[str, np.ndarray]:
                joints_pos = motion.joints_pos.detach().cpu().numpy()
                joints_rot = motion.joints_rot.detach().cpu().numpy()
                joints_local_rot = motion.joints_local_rot.detach().cpu().numpy()

                if joints_pos.ndim != 3:
                    raise ValueError(f"Expected unbatched joints_pos with shape [T, J, 3], got {joints_pos.shape}")
                if joints_rot.ndim != 4:
                    raise ValueError(f"Expected unbatched joints_rot with shape [T, J, 3, 3], got {joints_rot.shape}")
                if joints_local_rot.ndim != 4:
                    raise ValueError(
                        "Expected unbatched joints_local_rot with shape " f"[T, J, 3, 3], got {joints_local_rot.shape}"
                    )

                motion_data = {
                    "posed_joints": joints_pos,
                    "global_rot_mats": joints_rot,
                    "local_rot_mats": joints_local_rot,
                    "root_positions": joints_pos[:, motion.skeleton.root_idx, :],
                }
                if motion.foot_contacts is not None:
                    foot_contacts = motion.foot_contacts.detach().cpu().numpy()
                    if foot_contacts.ndim != 2:
                        raise ValueError(
                            f"Expected unbatched foot_contacts with shape [T, C], got {foot_contacts.shape}"
                        )
                    motion_data["foot_contacts"] = foot_contacts
                return motion_data

            def _coerce_save_path(raw_path: str, *, ext: str) -> str:
                """Ensure the save path ends with the correct extension for the chosen format."""
                name = (raw_path or "").strip()
                if name == "":
                    return f"output{ext}"
                known_exts = (".npz", ".bvh", ".csv")
                if name.lower().endswith(known_exts):
                    return os.path.splitext(name)[0] + ext
                if os.path.splitext(name)[1] == "":
                    return name + ext
                return name

            def save_motion(client, save_path, fmt):
                session = demo.client_sessions[client.client_id]
                motion = _get_primary_motion(session)
                motion_data = _motion_to_numpy_dict(motion)

                if fmt == "BVH":
                    save_path = _coerce_save_path(save_path, ext=".bvh")
                    save_motion_bvh(
                        save_path,
                        motion.joints_local_rot,
                        motion.joints_pos[:, session.skeleton.root_idx, :],
                        skeleton=session.skeleton,
                        fps=float(session.model_fps),
                        standard_tpose=bool(gui_save_bvh_standard_tpose_checkbox.value),
                    )
                elif fmt == "CSV":
                    save_path = _coerce_save_path(save_path, ext=".csv")
                    data = g1_csv_to_bytes(motion_data, session.skeleton, demo.device)
                    with open(save_path, "wb") as f:
                        f.write(data)
                elif fmt == "AMASS NPZ":
                    save_path = _coerce_save_path(save_path, ext=".npz")
                    data = amass_npz_to_bytes(motion_data, session.skeleton, session.model_fps)
                    with open(save_path, "wb") as f:
                        f.write(data)
                else:
                    save_path = _coerce_save_path(save_path, ext=".npz")
                    save_kimodo_npz(save_path, motion_data)
                return save_path

            @gui_save_motion_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                if get_active_session(event_client) is None:
                    return

                raw_path = gui_save_motion_path_text.value
                fmt = str(gui_save_motion_format_dropdown.value).upper()
                try:
                    saved_path = save_motion(event_client, raw_path, fmt)
                    event_client.add_notification(
                        title="Motion saved!",
                        body=f"Saved motion to {saved_path}",
                        auto_close_seconds=5.0,
                        color="green",
                    )
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    event_client.add_notification(
                        title="Failed to save motion!",
                        body=str(e),
                        auto_close_seconds=5.0,
                        color="red",
                    )

            def load_motion(client, load_path):
                session = demo.client_sessions[client.client_id]

                fps_arg = session.model_fps if session.model_fps and session.model_fps > 0 else None
                motion_dict, num_joints_motion = load_motion_file(load_path, target_fps=fps_arg)

                target_skel = registry_skeleton_for_joint_count(num_joints_motion)
                current_info = get_model_info(session.model_name)
                current_skel = current_info.skeleton if current_info is not None else None

                if current_skel != target_skel:
                    dataset = current_info.dataset if current_info is not None else "RP"
                    new_key = kimodo_short_key_for_skeleton_dataset(target_skel, dataset)
                    if new_key is None:
                        new_key = kimodo_short_key_for_skeleton_dataset(target_skel, "RP")
                    if new_key is None:
                        raise ValueError(
                            f"No Kimodo model found for skeleton {target_skel} (motion has J={num_joints_motion})."
                        )
                    if new_key != session.model_name:
                        gui_model_selector.set_from_short_key(new_key)
                        apply_model_selection(new_key)
                        _update_visibility_for_loaded_model(new_key)
                        client.add_notification(
                            title="Model switched",
                            body=f"Switched to {new_key} to match loaded motion (J={num_joints_motion}).",
                            auto_close_seconds=5.0,
                            color="blue",
                        )
                    session = demo.client_sessions[client.client_id]

                joints_pos = motion_dict["posed_joints"].to(device=demo.device, dtype=torch.float32)
                joints_rot = motion_dict["global_rot_mats"].to(device=demo.device, dtype=torch.float32)
                foot_contacts = motion_dict.get("foot_contacts")
                if foot_contacts is not None:
                    foot_contacts = foot_contacts.to(device=demo.device, dtype=torch.float32)

                # Support both batched [B, T, J, 3] and unbatched [T, J, 3]; take first sample if batched
                if joints_pos.ndim == 4:
                    joints_pos = joints_pos[0]
                if joints_rot.ndim == 5:
                    joints_rot = joints_rot[0]
                if foot_contacts is not None and foot_contacts.ndim == 3:
                    foot_contacts = foot_contacts[0]

                # Motion must match the current model's skeleton after auto-switch
                num_joints_loaded = joints_pos.shape[1]
                num_joints_skeleton = session.skeleton.nbjoints
                if num_joints_loaded != num_joints_skeleton:
                    # Backward compat: expand 30-joint SOMA motion to 77
                    if (
                        num_joints_loaded == 30
                        and num_joints_skeleton == 77
                        and isinstance(session.skeleton, SOMASkeleton77)
                    ):
                        from kimodo.skeleton import global_rots_to_local_rots

                        skel30 = SOMASkeleton30().to(demo.device)
                        if "local_rot_mats" in motion_dict:
                            local_rot_30 = motion_dict["local_rot_mats"].to(device=demo.device, dtype=torch.float32)
                            if local_rot_30.ndim == 4:
                                local_rot_30 = local_rot_30[0]
                        else:
                            local_rot_30 = global_rots_to_local_rots(joints_rot, skel30)
                        local_rot_77 = skel30.to_SOMASkeleton77(local_rot_30)
                        root_positions = joints_pos[:, skel30.root_idx, :]
                        joints_rot, joints_pos, _ = session.skeleton.fk(local_rot_77, root_positions)

                        if foot_contacts is not None and foot_contacts.shape[-1] == 4:
                            foot_contacts = torch.cat(
                                [
                                    foot_contacts[..., :2],
                                    foot_contacts[..., 1:2],
                                    foot_contacts[..., 2:4],
                                    foot_contacts[..., 3:4],
                                ],
                                dim=-1,
                            )
                    else:
                        raise ValueError(
                            f"The loaded motion has {num_joints_loaded} joints but the current model "
                            f"({session.model_name}) has {num_joints_skeleton} joints. "
                            "Load a motion generated with the same skeleton, or switch the model to match the motion."
                        )
                elif joints_rot.shape[1] != num_joints_skeleton:
                    raise ValueError(
                        f"Rotation data has {joints_rot.shape[1]} joints but the current model has "
                        f"{num_joints_skeleton} joints. The NPZ may be corrupted or from a different skeleton."
                    )

                # Apply G1 real robot projection (1-DoF per joint + axis limits) if enabled.
                if (
                    "g1" in session.model_name
                    and isinstance(session.skeleton, G1Skeleton34)
                    and gui_real_robot_rotations_checkbox.value
                ):
                    joints_pos, joints_rot = generation.apply_g1_real_robot_projection(
                        session.skeleton, joints_pos, joints_rot
                    )

                # Update duration and frame range based on loaded motion
                num_frames = joints_pos.shape[0]
                duration = num_frames / session.model_fps

                # Update GUI elements
                session.cur_duration = duration
                session.max_frame_idx = num_frames - 1

                # Clear existing motions and add the loaded one
                demo.clear_motions(client.client_id)
                demo.add_character_motion(
                    client,
                    session.skeleton,
                    joints_pos,
                    joints_rot,
                    foot_contacts,
                )

                # Reset to frame 0
                demo.set_frame(client.client_id, 0)

            @gui_load_motion_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                session = get_active_session(event_client)
                if session is None:
                    return

                load_path = gui_load_motion_path_text.value
                loading_notif = event_client.add_notification(
                    title="Loading motion...",
                    body=f"Loading from {load_path}",
                    loading=True,
                    with_close_button=False,
                    auto_close_seconds=None,
                )
                try:
                    load_motion(event_client, load_path)

                    loading_notif.title = "Motion loaded!"
                    loading_notif.body = f"Loaded motion from {load_path} ({session.max_frame_idx + 1} frames, {session.cur_duration:.2f}s)"
                    loading_notif.loading = False
                    loading_notif.with_close_button = True
                    loading_notif.auto_close_seconds = 5.0
                    loading_notif.color = "green"
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    loading_notif.title = "Failed to load motion!"
                    loading_notif.body = str(e)
                    loading_notif.loading = False
                    loading_notif.with_close_button = True
                    loading_notif.auto_close_seconds = 10.0
                    loading_notif.color = "red"

            def save_constraints(client, save_path):
                session = demo.client_sessions[client.client_id]
                # Keep save behavior aligned with demo frame convention:
                # valid frame indices are [0, max_frame_idx], so count is +1.
                num_frames = session.max_frame_idx + 1
                model_bundle = demo.load_model(session.model_name)
                constraints_lst = demo.compute_model_constraints_lst(session, model_bundle, num_frames)
                save_constraints_lst(save_path, constraints_lst)

            @gui_save_constraints_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                if get_active_session(event_client) is None:
                    return

                try:
                    save_path = gui_save_constraints_path_text.value
                    save_constraints(event_client, save_path)
                    event_client.add_notification(
                        title="Constraints saved!",
                        body=f"Saved constraints to {save_path}",
                        auto_close_seconds=5.0,
                        color="green",
                    )
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    event_client.add_notification(
                        title="Failed to save constraints!",
                        body=str(e),
                        auto_close_seconds=10.0,
                        color="red",
                    )

            def load_constraints(client, load_path):
                session = demo.client_sessions[client.client_id]
                constraints_lst = load_constraints_lst(load_path, skeleton=session.skeleton)

                # Clear existing constraints first
                with session.timeline_data["keyframe_update_lock"]:
                    for constraint in list(session.constraints.values()):
                        constraint.clear()
                    client.timeline.clear_keyframes()
                    client.timeline.clear_intervals()

                # Add loaded constraints to the session
                # We need to directly add constraint data, not read from current motion
                device = demo.device
                for constraint_obj in constraints_lst:
                    constraint_type = constraint_obj.name

                    # decompose the frame indices into intervals or single keyframes
                    frame_indices = constraint_obj.frame_indices
                    (
                        intervals,
                        intervals_indices,
                        single_frames,
                        single_frames_indices,
                    ) = extract_intervals_and_singles(frame_indices)

                    load_targets: list[dict] = []
                    root_pos = None

                    if constraint_type == "root2d":
                        # smooth_root_2d is [T, 2] (x, z), convert to [T, 3] (x, 0, z)
                        num_frames = constraint_obj.smooth_root_2d.shape[0]
                        root_pos = torch.zeros(num_frames, 3, device=device)
                        root_pos[:, 0] = constraint_obj.smooth_root_2d[:, 0]
                        root_pos[:, 2] = constraint_obj.smooth_root_2d[:, 1]
                        load_targets = [
                            {
                                "track_name": "2D Root",
                                "constraint_track": session.constraints["2D Root"],
                            }
                        ]
                    elif constraint_type == "fullbody":
                        load_targets = [
                            {
                                "track_name": "Full-Body",
                                "constraint_track": session.constraints["Full-Body"],
                            }
                        ]
                    elif constraint_type in {
                        "left-hand",
                        "right-hand",
                        "left-foot",
                        "right-foot",
                    }:
                        track_name = {
                            "left-hand": "Left Hand",
                            "right-hand": "Right Hand",
                            "left-foot": "Left Foot",
                            "right-foot": "Right Foot",
                        }[constraint_type]
                        load_targets = [
                            {
                                "track_name": track_name,
                                "constraint_track": session.constraints["End-Effectors"],
                                "joint_names": constraint_obj.joint_names,
                                "end_effector_type": constraint_type,
                            }
                        ]
                    elif constraint_type in {"end-effector", "end-effectors"}:
                        # Backward-compatible loader:
                        # split a generic end-effector constraint into per-limb timeline tracks.
                        joint_names_set = set(constraint_obj.joint_names)
                        for jname, track_name, eff_type in [
                            ("LeftHand", "Left Hand", "left-hand"),
                            ("RightHand", "Right Hand", "right-hand"),
                            ("LeftFoot", "Left Foot", "left-foot"),
                            ("RightFoot", "Right Foot", "right-foot"),
                        ]:
                            if jname not in joint_names_set:
                                continue
                            target_joint_names = [jname]
                            if "Hips" in joint_names_set:
                                target_joint_names.append("Hips")
                            load_targets.append(
                                {
                                    "track_name": track_name,
                                    "constraint_track": session.constraints["End-Effectors"],
                                    "joint_names": target_joint_names,
                                    "end_effector_type": eff_type,
                                }
                            )
                        if not load_targets:
                            raise KeyError(
                                "No recognized end-effector joint in constraint "
                                f"joint_names={constraint_obj.joint_names}"
                            )
                    else:
                        raise KeyError(f"Unsupported constraint type in loader: {constraint_type}")

                    for target in load_targets:
                        track_id = session.timeline_data["tracks_ids"][target["track_name"]]
                        constraint_track = target["constraint_track"]

                        # add intervals
                        for (start_idx, end_idx), (start_idx_t, end_idx_t) in zip(intervals, intervals_indices):
                            # Add to timeline
                            interval_id = client.timeline.add_interval(track_id, start_idx, end_idx)
                            session.timeline_data["intervals"][interval_id] = {
                                "track_id": track_id,
                                "start_frame_idx": start_idx,
                                "end_frame_idx": end_idx,
                                "locked": False,
                                "opacity": 1.0,
                                "value": None,
                            }
                            if constraint_type == "root2d":
                                constraint_track.add_interval(
                                    interval_id,
                                    start_idx,
                                    end_idx,
                                    root_pos[start_idx_t : end_idx_t + 1],
                                )
                            elif constraint_type == "fullbody":
                                constraint_track.add_interval(
                                    interval_id,
                                    start_idx,
                                    end_idx,
                                    constraint_obj.global_joints_positions[start_idx_t : end_idx_t + 1],
                                    constraint_obj.global_joints_rots[start_idx_t : end_idx_t + 1],
                                )
                            else:
                                constraint_track.add_interval(
                                    interval_id,
                                    start_idx,
                                    end_idx,
                                    constraint_obj.global_joints_positions[start_idx_t : end_idx_t + 1],
                                    constraint_obj.global_joints_rots[start_idx_t : end_idx_t + 1],
                                    target["joint_names"],
                                    target["end_effector_type"],
                                )

                        # add keyframes
                        for frame, frame_t in zip(single_frames, single_frames_indices):
                            # Add to timeline
                            keyframe_id = client.timeline.add_keyframe(track_id, frame)
                            session.timeline_data["keyframes"][keyframe_id] = {
                                "track_id": track_id,
                                "frame": frame,
                                "locked": False,
                                "opacity": 1.0,
                                "value": None,
                            }
                            if constraint_type == "root2d":
                                constraint_track.add_keyframe(
                                    keyframe_id,
                                    frame,
                                    root_pos[frame_t],
                                )
                            elif constraint_type == "fullbody":
                                constraint_track.add_keyframe(
                                    keyframe_id,
                                    frame,
                                    constraint_obj.global_joints_positions[frame_t],
                                    constraint_obj.global_joints_rots[frame_t],
                                )
                            else:
                                constraint_track.add_keyframe(
                                    keyframe_id,
                                    frame,
                                    constraint_obj.global_joints_positions[frame_t],
                                    constraint_obj.global_joints_rots[frame_t],
                                    target["joint_names"],
                                    target["end_effector_type"],
                                )

            @gui_load_constraints_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                if get_active_session(event_client) is None:
                    return

                try:
                    load_path = gui_load_constraints_path_text.value
                    load_constraints(event_client, load_path)
                    session = demo.client_sessions[event_client.client_id]
                    apply_constraint_overlay_visibility(session)

                    event_client.add_notification(
                        title="Constraints loaded!",
                        body=f"Loaded constraints from {load_path}",
                        auto_close_seconds=5.0,
                        color="green",
                    )
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    event_client.add_notification(
                        title="Failed to load constraints!",
                        body=str(e),
                        auto_close_seconds=10.0,
                        color="red",
                    )

        with client.gui.add_folder("Exports", expand_by_default=False):
            with client.gui.add_folder("Screenshot", expand_by_default=False, visible=not HF_MODE):
                gui_screenshot_path_text = client.gui.add_text(
                    "Save Path",
                    initial_value="render.png",
                    hint="Filename for the screenshot (PNG).",
                )
                gui_screenshot_button = client.gui.add_button(
                    "Download Screenshot",
                    hint="Capture the current canvas and download a PNG.",
                )
            with client.gui.add_folder("Video", expand_by_default=False, visible=not HF_MODE):
                gui_video_path_text = client.gui.add_text(
                    "Save Path",
                    initial_value="render.mp4",
                    hint="Filename for the video (MP4).",
                )
                gui_video_button = client.gui.add_button(
                    "Download Video",
                    hint="Render every frame and download as MP4.",
                )
            with client.gui.add_folder("Motion", expand_by_default=True):
                gui_download_name_text = client.gui.add_text(
                    "Name",
                    initial_value="output",
                    hint="Base filename to save as (extension will be added based on format if omitted).",
                )
                gui_download_format_dropdown = client.gui.add_dropdown(
                    "Format",
                    options=(
                        ["NPZ", "CSV"]
                        if "g1" in model_name.lower()
                        else ["NPZ", "AMASS NPZ"]
                        if "smplx" in model_name.lower()
                        else ["NPZ", "BVH"]
                    ),
                    initial_value="NPZ",
                )
                gui_download_bvh_standard_tpose_checkbox = client.gui.add_checkbox(
                    "Standard T-pose",
                    initial_value=False,
                    hint="For BVH export, use the standard T-pose rest skeleton.",
                    visible=False,
                )
                gui_download_button = client.gui.add_button(
                    "Download",
                    hint="Download the current motion (format + name above).",
                )

            def _download_bytes_to_browser(
                event_client: viser.ClientHandle,
                *,
                data: bytes,
                filename: str,
                mime_type: str = "application/octet-stream",
            ) -> None:
                """Trigger a browser download for an in-memory byte payload.

                Important: this intentionally does NOT use `showSaveFilePicker()` to avoid
                Chrome/Edge's file-write permission prompt ("this site can see edits you make").
                If you want "always ask where to save", configure your browser download settings.
                """
                import base64
                import json

                # Base64 is the most robust way to move binary over our websocket JS channel.
                b64 = base64.b64encode(data).decode("ascii")
                js = f"""
(() => {{
  const filename = {json.dumps(filename)};
  const mimeType = {json.dumps(mime_type)};
  const b64 = {json.dumps(b64)};

  // Decode base64 -> Uint8Array.
  const binStr = atob(b64);
  const bytes = new Uint8Array(binStr.length);
  for (let i = 0; i < binStr.length; i++) bytes[i] = binStr.charCodeAt(i);
  const blob = new Blob([bytes], {{ type: mimeType }});

  // Standard browser download behavior.
  const url = URL.createObjectURL(blob);
  const a = document.createElement("a");
  a.href = url;
  a.download = filename;
  document.body.appendChild(a);
  a.click();
  a.remove();
  URL.revokeObjectURL(url);
}})();
"""
                # Reuse viser’s JS execution mechanism (used for Plotly setup).
                from viser import _messages as _viser_messages

                event_client.gui._websock_interface.queue_message(  # type: ignore[attr-defined]
                    _viser_messages.RunJavascriptMessage(source=js)
                )

            def _motion_to_npz_bytes(motion) -> bytes:
                motion_data = _motion_to_numpy_dict(motion)
                return kimodo_npz_to_bytes(motion_data)

            def _motion_to_csv_bytes(motion, session: ClientSession) -> bytes:
                motion_data = _motion_to_numpy_dict(motion)
                return g1_csv_to_bytes(motion_data, session.skeleton, demo.device)

            def _motion_to_amass_npz_bytes(motion, session: ClientSession) -> bytes:
                motion_data = _motion_to_numpy_dict(motion)
                return amass_npz_to_bytes(motion_data, session.skeleton, session.model_fps)

            def _get_motion_export_formats(loaded_model_name: str) -> list[str]:
                model_name_lower = (loaded_model_name or "").lower()
                if "g1" in model_name_lower:
                    return ["NPZ", "CSV"]
                if "smplx" in model_name_lower:
                    return ["NPZ", "AMASS NPZ"]
                return ["NPZ", "BVH"]

            def _update_format_dropdown(dropdown, loaded_model_name: str) -> None:
                new_options = _get_motion_export_formats(loaded_model_name)
                current_value = str(dropdown.value)
                dropdown.options = new_options
                dropdown.value = current_value if current_value in new_options else new_options[0]

            def _update_motion_export_dropdown(loaded_model_name: str) -> None:
                _update_format_dropdown(gui_download_format_dropdown, loaded_model_name)
                _update_format_dropdown(gui_save_motion_format_dropdown, loaded_model_name)
                _update_bvh_standard_tpose_visibility()

            def _update_bvh_standard_tpose_visibility() -> None:
                gui_save_bvh_standard_tpose_checkbox.visible = (
                    str(gui_save_motion_format_dropdown.value).upper() == "BVH"
                )
                gui_download_bvh_standard_tpose_checkbox.visible = (
                    str(gui_download_format_dropdown.value).upper() == "BVH"
                )

            @gui_save_motion_format_dropdown.on_update
            def _(_event: viser.GuiEvent) -> None:
                _update_bvh_standard_tpose_visibility()

            @gui_download_format_dropdown.on_update
            def _(_event: viser.GuiEvent) -> None:
                _update_bvh_standard_tpose_visibility()

            def _coerce_download_filename(raw_name: str, *, ext: str) -> str:
                """Coerce a user-entered filename to a safe basename with the desired extension.

                - If empty: uses "output{ext}"
                - If no extension: appends ext
                - If endswith a known export extension: rewrites extension to ext (prevents mismatches)
                - Any provided directory components are stripped
                """
                import os

                name = (raw_name or "").strip()
                name = os.path.basename(name.replace("\\", "/"))
                if name == "":
                    return f"output{ext}"

                known_exts = (".npz", ".bvh", ".csv", ".png", ".mp4")
                lower = name.lower()
                if lower.endswith(known_exts):
                    return os.path.splitext(name)[0] + ext

                root, cur_ext = os.path.splitext(name)
                if cur_ext == "":
                    return name + ext
                return name

            def _get_render_size(event_client: viser.ClientHandle) -> tuple[int, int]:
                width = int(event_client.camera.image_width)
                height = int(event_client.camera.image_height)
                if width <= 0 or height <= 0:
                    # Fall back to a reasonable default if the camera hasn't synced yet.
                    return (1280, 720)
                return (width, height)

            def _round_up_to_multiple(value: int, multiple: int) -> int:
                if multiple <= 0:
                    return value
                return ((value + multiple - 1) // multiple) * multiple

            def _download_canvas_to_browser(event_client: viser.ClientHandle, *, filename: str) -> None:
                """Use the client-side canvas save path to avoid server-side renders."""
                import json

                js = f"""
(() => {{
  const filename = {json.dumps(filename)};
  const canvases = Array.from(document.querySelectorAll("canvas"));
  if (!canvases.length) {{
    console.error("No canvases found to save.");
    return;
  }}
  // Pick the largest canvas by area (usually the main 3D view).
  const canvas = canvases.reduce((best, cur) => {{
    const bestArea = (best?.width || 0) * (best?.height || 0);
    const curArea = (cur?.width || 0) * (cur?.height || 0);
    return curArea > bestArea ? cur : best;
  }}, null);
  if (!canvas) {{
    console.error("No canvas selected to save.");
    return;
  }}
  canvas.toBlob((blob) => {{
    if (!blob) {{
      console.error("Export failed");
      return;
    }}
    const url = URL.createObjectURL(blob);
    const a = document.createElement("a");
    a.href = url;
    a.download = filename;
    document.body.appendChild(a);
    a.click();
    a.remove();
    URL.revokeObjectURL(url);
  }}, "image/png");
}})();
"""
                from viser import _messages as _viser_messages

                event_client.gui._websock_interface.queue_message(  # type: ignore[attr-defined]
                    _viser_messages.RunJavascriptMessage(source=js)
                )

            @gui_screenshot_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                if get_active_session(event_client) is None:
                    return

                try:
                    filename = _coerce_download_filename(
                        str(gui_screenshot_path_text.value),
                        ext=".png",
                    )
                    _download_canvas_to_browser(event_client, filename=filename)
                    event_client.add_notification(
                        title="Screenshot download started",
                        body=f"Saving {filename}",
                        auto_close_seconds=5.0,
                        color="green",
                    )
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    event_client.add_notification(
                        title="Failed to download screenshot!",
                        body=str(e),
                        auto_close_seconds=10.0,
                        color="red",
                    )

            @gui_video_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                session = get_active_session(event_client)
                if session is None:
                    return
                recording_notification: viser.NotificationHandle | None = None
                try:
                    recording_notification = event_client.add_notification(
                        title="Recording video...",
                        body="Saving frames, please wait.",
                        loading=True,
                        with_close_button=False,
                        auto_close_seconds=None,
                        color="blue",
                    )
                    event_client.timeline.disable_constraints()
                    width, height = _get_render_size(event_client)
                    # Avoid ffmpeg macro block resizing warnings.
                    width = _round_up_to_multiple(width, 16)
                    height = _round_up_to_multiple(height, 16)
                    original_frame = session.frame_idx
                    frames = []
                    for frame_idx in range(session.max_frame_idx + 1):
                        demo.set_frame(
                            event_client.client_id,
                            frame_idx,
                            update_timeline=True,
                        )
                        frames.append(
                            event_client.get_render(
                                height=height,
                                width=width,
                                transport_format="jpeg",
                            )
                        )

                    # Restore the original frame (and timeline).
                    demo.set_frame(event_client.client_id, original_frame)

                    import imageio.v3 as iio

                    filename = _coerce_download_filename(
                        str(gui_video_path_text.value),
                        ext=".mp4",
                    )
                    payload = iio.imwrite(
                        "<bytes>",
                        frames,
                        extension=".mp4",
                        fps=float(session.model_fps),
                        codec="h264",
                        plugin="pyav",
                    )
                    event_client.send_file_download(filename, payload, save_immediately=True)
                    event_client.add_notification(
                        title="Video download started",
                        body=f"Saving {filename}",
                        auto_close_seconds=5.0,
                        color="green",
                    )
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    event_client.add_notification(
                        title="Failed to download video!",
                        body=str(e),
                        auto_close_seconds=10.0,
                        color="red",
                    )
                finally:
                    event_client.timeline.enable_constraints()
                    if recording_notification is not None:
                        recording_notification.remove()

            @gui_download_button.on_click
            def _(event: viser.GuiEvent) -> None:
                event_client = event.client
                session = get_active_session(event_client)
                if session is None:
                    return
                motion = _get_primary_motion(session)
                try:
                    fmt = str(gui_download_format_dropdown.value).upper()
                    raw_name = str(gui_download_name_text.value)

                    if fmt == "BVH":
                        filename = _coerce_download_filename(raw_name, ext=".bvh")
                        payload = motion_to_bvh_bytes(
                            motion.joints_local_rot,
                            motion.joints_pos[:, session.skeleton.root_idx, :],  # root positions
                            skeleton=session.skeleton,
                            fps=float(session.model_fps),
                            standard_tpose=bool(gui_download_bvh_standard_tpose_checkbox.value),
                        )
                        mime = "text/plain"
                    elif fmt == "CSV":
                        filename = _coerce_download_filename(raw_name, ext=".csv")
                        payload = _motion_to_csv_bytes(motion, session)
                        mime = "text/csv"
                    elif fmt == "AMASS NPZ":
                        filename = _coerce_download_filename(raw_name, ext=".npz")
                        payload = _motion_to_amass_npz_bytes(motion, session)
                        mime = "application/octet-stream"
                    else:
                        # Default to NPZ (most common and matches existing save/load).
                        filename = _coerce_download_filename(raw_name, ext=".npz")
                        payload = _motion_to_npz_bytes(motion)
                        mime = "application/octet-stream"

                    _download_bytes_to_browser(
                        event_client,
                        data=payload,
                        filename=filename,
                        mime_type=mime,
                    )

                    event_client.add_notification(
                        title="Download started",
                        body=f"Saving {filename}",
                        auto_close_seconds=5.0,
                        color="green",
                    )
                except Exception as e:
                    import traceback

                    traceback.print_exc()
                    event_client.add_notification(
                        title="Failed to download motion!",
                        body=str(e),
                        auto_close_seconds=10.0,
                        color="red",
                    )

        @gui_save_example_button.on_click
        def _(event: viser.GuiEvent) -> None:
            from kimodo.tools import save_json

            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return

            save_dir = gui_save_example_path_text.value
            if os.path.exists(save_dir):
                event_client.add_notification(
                    title="Failed to save example!",
                    body="Example directory already exists",
                    auto_close_seconds=10.0,
                    color="red",
                )
                return

            try:
                os.makedirs(save_dir)
                # save the constraints
                constraint_path = os.path.join(save_dir, "constraints.json")
                save_constraints(event_client, constraint_path)
                # save the motion
                motion_path = os.path.join(save_dir, "motion.npz")
                save_motion(event_client, motion_path, "NPZ")
                # save the gui metadata
                meta_path = os.path.join(save_dir, "meta.json")
                prompt_texts = []
                prompt_durations_sec = []
                prompt_values = sorted(
                    [x for x in client.timeline._prompts.values()],
                    key=lambda x: x.start_frame,
                )
                for i, prompt in enumerate(prompt_values):
                    prompt_texts.append(prompt.text)
                    # Match demo/generation convention:
                    # non-last prompts: [start, end) ; last prompt: [start, end].
                    n_frames = prompt.end_frame - prompt.start_frame
                    if i == len(prompt_values) - 1:
                        n_frames += 1
                    prompt_durations_sec.append(n_frames / session.model_fps)
                if len(prompt_texts) == 1:
                    meta_info = {
                        "text": prompt_texts[0],
                        "duration": prompt_durations_sec[0],
                    }
                else:
                    meta_info = {
                        "texts": prompt_texts,
                        "durations": prompt_durations_sec,
                    }
                meta_info["num_samples"] = gui_num_samples_slider.value
                meta_info["seed"] = gui_seed.value
                meta_info["diffusion_steps"] = gui_diffusion_steps_slider.value
                meta_info["cfg"] = {
                    "enabled": gui_cfg_checkbox.value,
                    "text_weight": gui_cfg_text_weight_slider.value,
                    "constraint_weight": gui_cfg_constraint_weight_slider.value,
                }
                save_json(meta_path, meta_info)

                # update the example dropdown
                session.example_dict = viser_utils.load_example_cases(session.examples_base_dir)
                update_examples_dropdown(session.example_dict, keep_selection=True)

                event_client.add_notification(
                    title="Example saved!",
                    body=f"Saved example to {save_dir}",
                    auto_close_seconds=5.0,
                    color="green",
                )
            except Exception as e:
                import traceback

                traceback.print_exc()
                event_client.add_notification(
                    title="Failed to save example!",
                    body=str(e),
                    auto_close_seconds=10.0,
                    color="red",
                )

        def set_new_duration(client_id, new_duration):
            session = demo.client_sessions[client_id]
            session.cur_duration = new_duration
            update_duration_gui(new_duration)
            session.max_frame_idx = int(session.cur_duration * session.model_fps - 1)
            if session.frame_idx > session.max_frame_idx:
                demo.set_frame(client_id, session.max_frame_idx)

        def apply_model_selection(new_model_name: str) -> None:
            session = demo.client_sessions[client_id]
            if new_model_name == session.model_name:
                return

            session.playing = False  # Pause playback when switching models.

            old_model_fps = session.model_fps
            old_duration = session.cur_duration
            old_prompts = [
                (prompt.text, prompt.start_frame, prompt.end_frame) for prompt in client.timeline._prompts.values()
            ]
            old_default_zoom_frames = client.timeline._default_num_frames_zoom
            old_max_zoom_frames = client.timeline._max_frames_zoom

            model_bundle = demo.load_model(new_model_name)

            # Clear motions and constraints when switching models.
            if session.edit_mode and session.motions:
                exit_editing_mode(session)
            session.edit_mode = False
            demo.clear_motions(client_id)
            with session.timeline_data["keyframe_update_lock"]:
                for constraint in list(session.constraints.values()):
                    constraint.clear()
                session.constraints = demo.build_constraint_tracks(client, model_bundle.skeleton)
                session.timeline_data["keyframes"] = {}
                session.timeline_data["intervals"] = {}
                client.timeline.clear_keyframes()
                client.timeline.clear_intervals()

            session.model_name = new_model_name
            session.model_fps = model_bundle.model_fps
            session.skeleton = model_bundle.skeleton
            session.motion_rep = model_bundle.motion_rep
            session.cur_duration = old_duration
            session.max_frame_idx = int(session.cur_duration * session.model_fps - 1)
            session.frame_idx = 0
            session.edit_mode = False

            demo.set_timeline_defaults(client.timeline, session.model_fps)
            client.timeline.set_current_frame(0)
            gui_model_fps.value = session.model_fps
            update_duration_gui(session.cur_duration)

            if old_model_fps > 0:
                default_zoom_seconds = old_default_zoom_frames / old_model_fps
                max_zoom_seconds = old_max_zoom_frames / old_model_fps
                new_default_zoom = int(round(default_zoom_seconds * session.model_fps))
                new_max_zoom = int(round(max_zoom_seconds * session.model_fps))
                new_default_zoom = max(1, new_default_zoom)
                new_max_zoom = max(new_default_zoom, new_max_zoom)
                client.timeline.set_zoom_settings(
                    default_num_frames_zoom=new_default_zoom,
                    max_frames_zoom=new_max_zoom,
                )

            client.timeline.clear_prompts()
            if old_prompts and old_model_fps > 0:
                for i, (prompt_text, start_frame, end_frame) in enumerate(old_prompts):
                    start_sec = start_frame / old_model_fps
                    end_sec = end_frame / old_model_fps
                    new_start = int(round(start_sec * session.model_fps))
                    new_end = int(round(end_sec * session.model_fps))
                    new_start = max(0, min(new_start, session.max_frame_idx))
                    new_end = max(new_start, min(new_end, session.max_frame_idx))
                    color = PROMPT_COLORS[i % len(PROMPT_COLORS)]
                    client.timeline.add_prompt(prompt_text, new_start, new_end, color=color)

            session.examples_base_dir = demo.get_examples_base_dir(new_model_name, absolute=True)
            session.example_dict = viser_utils.load_example_cases(session.examples_base_dir)
            update_examples_dropdown(session.example_dict, keep_selection=False)
            gui_save_example_path_text.value = os.path.join(
                demo.get_examples_base_dir(new_model_name, absolute=True),
                "custom_example_1",
            )
            gui_load_example_path_text.value = os.path.join(
                demo.get_examples_base_dir(new_model_name, absolute=True),
                "custom_example_1",
            )

            demo.add_character_motion(client, session.skeleton)
            apply_constraint_overlay_visibility(session)

        def _update_version_and_display_from_dataset_skeleton() -> None:
            dataset_ui = gui_dataset_selector.value
            skeleton_display = gui_skeleton_selector.value
            skeleton_val = get_skeleton_key_from_display_name(skeleton_display)
            if skeleton_val is None:
                return
            models = get_models_for_dataset_skeleton(dataset_ui, skeleton_val, family="Kimodo")
            if not models:
                return
            gui_version_selector.options = [m.display_name for m in models]
            gui_version_selector.value = models[0].display_name
            gui_version_selector.visible = len(models) > 1
            gui_model_display.content = f"**Model:** {models[0].display_name}"

        def _update_visibility_for_loaded_model(loaded_model_name: str) -> None:
            """Update model-specific controls from the currently loaded model only."""
            if not loaded_model_name:
                return
            _update_motion_export_dropdown(loaded_model_name)
            gui_use_soma_layer_checkbox.visible = "soma" in loaded_model_name
            _is_g1 = "g1" in loaded_model_name
            gui_real_robot_rotations_checkbox.visible = _is_g1
            gui_postprocess_checkbox.visible = not _is_g1
            gui_root_margin.visible = not _is_g1 and gui_postprocess_checkbox.value
            if _is_g1:
                gui_gizmo_space_dropdown.value = "Local"
            gui_gizmo_space_dropdown.visible = not _is_g1
            gui_gizmo_space_dropdown.disabled = _is_g1

        def _on_load_model_click(event: viser.GuiEvent) -> None:
            """Load the currently selected model (called from Load model button)."""
            if get_active_session(event.client) is None:
                return
            new_model_name = gui_model_selector.value
            if not new_model_name:
                return
            info = get_model_info(new_model_name)
            if info is None:
                return
            session = demo.client_sessions[event.client.client_id]
            if new_model_name == session.model_name:
                return
            loading_notif = event.client.add_notification(
                title="Loading model...",
                body=f"Loading {info.display_name}",
                loading=True,
                with_close_button=False,
            )
            try:
                apply_model_selection(new_model_name)
                _update_visibility_for_loaded_model(new_model_name)
                loading_notif.title = "Model loaded"
                loading_notif.body = f"{info.display_name} is ready."
                loading_notif.loading = False
                loading_notif.with_close_button = True
                loading_notif.auto_close_seconds = 5.0
                loading_notif.color = "green"
            except Exception as e:
                loading_notif.loading = False
                loading_notif.with_close_button = True
                event.client.add_notification(
                    title="Model failed to load",
                    body=str(e),
                    color="red",
                    auto_close_seconds=10.0,
                )
                gui_model_selector.set_from_short_key(session.model_name)

        @gui_load_model_button.on_click
        def _(event: viser.GuiEvent) -> None:
            _on_load_model_click(event)

        @gui_dataset_selector.on_update
        def _(event: viser.GuiEvent) -> None:
            if get_active_session(event.client) is None:
                return
            skeleton_labels = get_allowed_skeleton_labels(gui_dataset_selector.value)
            gui_skeleton_selector.options = skeleton_labels
            gui_skeleton_selector.value = skeleton_labels[0] if skeleton_labels else ""
            _update_version_and_display_from_dataset_skeleton()

        @gui_skeleton_selector.on_update
        def _(event: viser.GuiEvent) -> None:
            if get_active_session(event.client) is None:
                return
            _update_version_and_display_from_dataset_skeleton()

        @gui_version_selector.on_update
        def _(event: viser.GuiEvent) -> None:
            if get_active_session(event.client) is None:
                return
            info = get_model_info(gui_model_selector.value)
            if info is not None:
                gui_model_display.content = f"**Model:** {info.display_name}"

        @gui_use_soma_layer_checkbox.on_update
        def _(event: viser.GuiEvent) -> None:
            session = get_active_session(event.client)
            if session is None or "soma" not in (session.model_name or ""):
                return

            loading_notif = event.client.add_notification(
                title="Applying SOMA layer...",
                body="Updating mesh.",
                loading=True,
                with_close_button=False,
            )
            try:
                current_motion = list(session.motions.values())[0] if session.motions else None
                current_frame_idx = session.frame_idx

                # Recreate the character to apply the new SOMA mesh mode selection.
                demo.clear_motions(event.client.client_id)
                if current_motion is None:
                    demo.add_character_motion(event.client, session.skeleton)
                else:
                    demo.add_character_motion(
                        event.client,
                        session.skeleton,
                        current_motion.joints_pos,
                        current_motion.joints_rot,
                        current_motion.foot_contacts,
                    )

                demo.set_frame(event.client.client_id, current_frame_idx)
            except Exception as e:
                print(e)
                event.client.add_notification(
                    title="SOMA layer failed",
                    body=str(e),
                    color="red",
                    auto_close_seconds=10.0,
                )
                gui_use_soma_layer_checkbox.value = not gui_use_soma_layer_checkbox.value
            finally:
                loading_notif.loading = False
                loading_notif.with_close_button = True
                loading_notif.auto_close_seconds = 2.0

        @gui_real_robot_rotations_checkbox.on_update
        def _(event: viser.GuiEvent) -> None:
            session = get_active_session(event.client)
            if session is None or "g1" not in session.model_name:
                return
            if not isinstance(session.skeleton, G1Skeleton34) or not session.motions:
                return
            if not gui_real_robot_rotations_checkbox.value:
                return
            # Reproject all displayed G1 motions to real robot DoF (1-DoF per joint + axis limits).
            from kimodo.skeleton import global_rots_to_local_rots

            current_frame_idx = session.frame_idx
            for motion in session.motions.values():
                if motion.length <= 1:
                    continue
                rest_pos = motion.joints_pos[0:1]
                rest_rot = motion.joints_rot[0:1]
                same_as_rest = (motion.joints_pos - rest_pos).abs().max().item() < 1e-6 and (
                    motion.joints_rot - rest_rot
                ).abs().max().item() < 1e-6
                if same_as_rest:
                    continue
                new_pos, new_rot = generation.apply_g1_real_robot_projection(
                    session.skeleton,
                    motion.joints_pos,
                    motion.joints_rot,
                )
                motion.joints_pos = new_pos
                motion.joints_rot = new_rot
                motion.joints_local_rot = global_rots_to_local_rots(new_rot, session.skeleton)
                # Refresh skeleton and skinned mesh caches so the viz uses new positions.
                motion.precompute_mesh_info()
            demo.set_frame(event.client.client_id, current_frame_idx)
            event.client.add_notification(
                title="Real robot projection applied",
                body="The motion is projected to G1 real robot DoF (1-DoF per joint, clamped to axis limits).",
                auto_close_seconds=4.0,
                color="green",
            )

        def load_example_from_path(
            event_client: viser.ClientHandle,
            example_path: str,
            load_gt: bool = False,
        ) -> None:
            from kimodo.meta import parse_prompts_from_meta
            from kimodo.tools import load_json

            session = get_active_session(event_client)
            if session is None:
                return

            # Pause playback when loading an example.
            session.playing = False

            if not os.path.isdir(example_path):
                event_client.add_notification(
                    title="Example path not found",
                    body=f"Directory does not exist: {example_path}",
                    auto_close_seconds=5.0,
                    color="red",
                )
                return

            # Long motions trigger a skinning precompute that can take several
            # seconds; show a persistent "loading" notification so the user
            # knows the app isn't frozen. Cleared in the finally block below.
            loading_notif = event_client.add_notification(
                title="Loading example...",
                body=f"Loading {os.path.basename(example_path.rstrip(os.sep))}. This may take a moment for long motions.",
                loading=True,
                with_close_button=False,
            )

            try:
                # constraints
                constraints_path = os.path.join(example_path, "constraints.json")
                if os.path.exists(constraints_path):
                    load_constraints(event_client, constraints_path)
                else:
                    # clear all existing constraints
                    with session.timeline_data["keyframe_update_lock"]:
                        for constraint in list(session.constraints.values()):
                            constraint.clear()
                        event_client.timeline.clear_keyframes()
                        event_client.timeline.clear_intervals()
                # motion
                motion_filename = "gt_motion.npz" if load_gt else "motion.npz"
                motion_path = os.path.join(example_path, motion_filename)
                if os.path.exists(motion_path):
                    load_motion(event_client, motion_path)
                # metadata
                meta_path = os.path.join(example_path, "meta.json")
                if os.path.exists(meta_path):
                    meta_info = load_json(meta_path)
                    event_client.timeline.clear_prompts()

                    texts, durations_sec = parse_prompts_from_meta(meta_info)
                    fps = session.model_fps
                    # Convert durations (seconds) to consecutive frame bounds
                    num_frames = 0
                    frame_bounds = []
                    for i, d in enumerate(durations_sec):
                        n_frames = max(1, int(round(d * fps)))
                        start_frame = num_frames
                        # Inverse of compute_prompt_num_frames():
                        # non-last prompts end at next prompt start (exclusive),
                        # last prompt includes its end frame.
                        if i == len(durations_sec) - 1:
                            end_frame = num_frames + n_frames - 1
                        else:
                            end_frame = num_frames + n_frames
                        frame_bounds.append((start_frame, end_frame))
                        num_frames += n_frames

                    # Adapt timeline zoom to the loaded motion.
                    target_visible_frames = int(math.ceil(1.10 * num_frames))
                    event_client.timeline.set_zoom_settings(
                        default_num_frames_zoom=target_visible_frames,
                    )

                    for i, (prompt_text, (start_frame, end_frame)) in enumerate(zip(texts, frame_bounds)):
                        color = PROMPT_COLORS[i % len(PROMPT_COLORS)]
                        event_client.timeline.add_prompt(prompt_text, start_frame, end_frame, color=color)

                    update_duration_auto()

                    # Only load optional fields if present
                    if "num_samples" in meta_info:
                        gui_num_samples_slider.value = meta_info["num_samples"]
                    if "seed" in meta_info:
                        gui_seed.value = meta_info["seed"]
                    if "diffusion_steps" in meta_info:
                        gui_diffusion_steps_slider.value = meta_info["diffusion_steps"]
                    if "cfg" in meta_info:
                        cfg = meta_info["cfg"]
                        if "enabled" in cfg:
                            gui_cfg_checkbox.value = cfg["enabled"]
                        if "text_weight" in cfg:
                            gui_cfg_text_weight_slider.value = cfg["text_weight"]
                        if "constraint_weight" in cfg:
                            gui_cfg_constraint_weight_slider.value = cfg["constraint_weight"]

                # Set frame to 0 when example is loaded.
                session.frame_idx = 0
                event_client.timeline.set_current_frame(0)
                demo.set_frame(event_client.client_id, 0)

                event_client.add_notification(
                    title="Example loaded!",
                    body=f"Loaded example from {example_path}",
                    auto_close_seconds=5.0,
                    color="green",
                )
            except Exception as e:
                import traceback

                traceback.print_exc()
                event_client.add_notification(
                    title="Failed to load example!",
                    body=str(e),
                    auto_close_seconds=10.0,
                    color="red",
                )
            finally:
                loading_notif.remove()

        @gui_load_example_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return

            if not session.example_dict or (gui_examples_dropdown.value not in session.example_dict):
                event_client.add_notification(
                    title="No examples available",
                    body="No examples found for the selected model.",
                    auto_close_seconds=5.0,
                    color="red",
                )
                return

            example_path = session.example_dict[gui_examples_dropdown.value]
            load_example_from_path(event_client, example_path, gui_load_gt_checkbox.value)

        @gui_load_example_from_path_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return

            example_path = gui_load_example_path_text.value
            if not example_path:
                event_client.add_notification(
                    title="No example path",
                    body="Please provide an example directory.",
                    auto_close_seconds=5.0,
                    color="red",
                )
                return
            load_example_from_path(event_client, example_path, gui_load_gt_checkbox.value)

        @gui_cfg_checkbox.on_update
        def _(_) -> None:
            if not demo.client_active(client_id):
                return
            val = gui_cfg_checkbox.value
            gui_cfg_text_weight_slider.visible = val
            gui_cfg_constraint_weight_slider.visible = val

        def exit_editing_mode(session: ClientSession):
            gui_edit_constraint_button.label = "Enter Editing Mode"
            gui_generate_button.disabled = False
            gui_generate_button.label = "Generate"
            gui_reset_constraint_button.disabled = True
            if "g1" in session.model_name:
                gui_gizmo_space_dropdown.value = "Local"
                gui_gizmo_space_dropdown.disabled = True
                gui_gizmo_space_dropdown.visible = False
            else:
                gui_gizmo_space_dropdown.disabled = False
                gui_gizmo_space_dropdown.visible = True
            gui_undo_drag_button.disabled = True
            gui_use_soma_layer_checkbox.disabled = False
            session.edit_mode_snapshot = None
            session.undo_drag_snapshot = None

            motion = list(session.motions.values())[0]
            motion.clear_all_gizmos()
            motion.character.set_skinned_mesh_wireframe(False)
            motion.character.set_skeleton_visibility(False)
            motion.character.set_skinned_mesh_visibility(True)
            motion.character.set_skinned_mesh_opacity(1.0)
            session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value = 1.0

            # If the path is dense, put the motion back on the path
            if "2D Root" in session.constraints and session.constraints["2D Root"].dense_path:
                _update_dense_path(motion, session)

            gui_viz_skinned_mesh_checkbox.value = True
            gui_viz_skeleton_checkbox.value = False

        # enter editing mode callback
        @gui_edit_constraint_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return

            session.edit_mode = not session.edit_mode

            edit_alert = "Entered editing mode"
            no_edit_alert = "Exited editing mode"
            edit_message = "You can now modify pose or path constraints."
            no_edit_message = "Can now generate motions."
            event_client.add_notification(
                title=edit_alert if session.edit_mode else no_edit_alert,
                body=edit_message if session.edit_mode else no_edit_message,
                auto_close_seconds=10.0,
                color="blue",
            )

            if session.edit_mode:
                gui_edit_constraint_button.label = "Exit Editing Mode"
                gui_generate_button.disabled = True
                gui_generate_button.label = "Generate Disabled In Editing Mode"
                if "g1" in session.model_name:
                    gui_gizmo_space_dropdown.value = "Local"
                gui_gizmo_space_dropdown.disabled = True
                gui_use_soma_layer_checkbox.disabled = True

                assert len(session.motions) == 1, "Only one motion allowed in edit mode"
                motion = list(session.motions.values())[0]
                snapshot_frame_idx = min(session.frame_idx, motion.length - 1)
                session.edit_mode_snapshot = {}
                ensure_edit_snapshot(session, motion, snapshot_frame_idx)
                gui_reset_constraint_button.disabled = False

                motion.character.set_skeleton_visibility(True)
                # motion.character.set_skinned_mesh_wireframe(True)
                motion.character.set_skinned_mesh_opacity(0.65)
                session.gui_elements.gui_viz_skinned_mesh_opacity_slider.value = 0.65
                motion.character.set_skinned_mesh_visibility(True)
                gui_viz_skinned_mesh_checkbox.value = True
                gui_viz_skeleton_checkbox.value = True

                # need gizmos for root translation and individual joints
                def _on_root2d_gizmo_release():
                    if "2D Root" in session.constraints and session.constraints["2D Root"].dense_path:
                        mot = list(session.motions.values())[0]
                        _update_dense_path(mot, session)

                def _on_gizmo_drag_start():
                    mot = list(session.motions.values())[0]
                    frame_idx = min(session.frame_idx, mot.length - 1)
                    session.undo_drag_snapshot = {
                        "frame_idx": frame_idx,
                        "joints_pos": mot.get_joints_pos(frame_idx),
                        "joints_rot": mot.get_joints_rot(frame_idx),
                    }
                    gui_undo_drag_button.disabled = False

                motion.add_root_translation_gizmo(
                    session.constraints,
                    on_2d_root_drag_end=_on_root2d_gizmo_release,
                    on_drag_start=_on_gizmo_drag_start,
                )
                gizmo_space = "local" if "g1" in session.model_name else gui_gizmo_space_dropdown.value.lower()
                motion.add_joint_gizmos(
                    session.constraints,
                    space=gizmo_space,
                    on_drag_start=_on_gizmo_drag_start,
                )
            else:
                exit_editing_mode(session)

        @gui_reset_constraint_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None or not session.edit_mode_snapshot:
                return

            if not session.motions:
                return
            motion = list(session.motions.values())[0]
            snapshot_frame_idx = min(session.frame_idx, motion.length - 1)
            if snapshot_frame_idx not in session.edit_mode_snapshot:
                return
            motion.update_pose_at_frame(
                snapshot_frame_idx,
                joints_pos=session.edit_mode_snapshot[snapshot_frame_idx]["joints_pos"],
                joints_rot=session.edit_mode_snapshot[snapshot_frame_idx]["joints_rot"],
            )
            demo.set_frame(event_client.client_id, snapshot_frame_idx, update_timeline=False)

        @gui_undo_drag_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None or session.undo_drag_snapshot is None:
                return

            if not session.motions:
                return
            motion = list(session.motions.values())[0]
            frame_idx = session.undo_drag_snapshot["frame_idx"]
            motion.update_pose_at_frame(
                frame_idx,
                joints_pos=session.undo_drag_snapshot["joints_pos"],
                joints_rot=session.undo_drag_snapshot["joints_rot"],
            )
            demo.set_frame(event_client.client_id, frame_idx, update_timeline=False)
            session.undo_drag_snapshot = None
            gui_undo_drag_button.disabled = True

        def validate_interval(start_frame_idx: int, end_frame_idx: int, max_frame_idx: int) -> bool:
            if start_frame_idx < 0 or start_frame_idx > max_frame_idx:
                return False
            if end_frame_idx < 0 or end_frame_idx > max_frame_idx:
                return False
            if end_frame_idx < start_frame_idx:
                return False
            return True

        def clamp_interval_to_range(
            start_frame_idx: int, end_frame_idx: int, max_frame_idx: int
        ) -> Optional[tuple[int, int]]:
            if end_frame_idx < 0 or start_frame_idx > max_frame_idx:
                return None
            start_clamped = max(0, start_frame_idx)
            end_clamped = min(max_frame_idx, end_frame_idx)
            if end_clamped < start_clamped:
                return None
            return start_clamped, end_clamped

        # add constraint callback
        def add_constraint_callback(
            constraint_id: str,
            constraint_type: str,
            frame_range: tuple[int, int],
            joint_names: list[str] = None,
            verbose: bool = True,
        ):
            """Add a constraint to the session.

            Args:
                constraint_type: str, the type of constraint to add
                frame_range: tuple[int, int], the frame range to add the constraint to
                joint_names: list[str], the names of the joints to constraint if the constraint type is End-Effectors
            """
            # Check if session still exists
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]

            assert len(session.motions) == 1, "Only one motion allowed for adding constraints"
            motion = list(session.motions.values())[0]

            end_effector_type = None
            if constraint_type in [
                "Left Hand",
                "Right Hand",
                "Left Foot",
                "Right Foot",
            ]:
                joint_names = [constraint_type.replace(" ", ""), "Hips"]
                # Hips are required because of smooth root representation
                end_effector_type = constraint_type.replace(" ", "-").lower()
                constraint_type = "End-Effectors"

            # check to make sure interval is valid
            is_interval = frame_range[1] != frame_range[0]
            start_frame_idx = int(frame_range[0])
            end_frame_idx = int(frame_range[1])

            if is_interval:
                clamped = clamp_interval_to_range(start_frame_idx, end_frame_idx, session.max_frame_idx)
                if clamped is None:
                    print("Interval outside range! Couldn't add constraint.")
                    return
                start_frame_idx, end_frame_idx = clamped
            else:
                if not validate_interval(start_frame_idx, end_frame_idx, session.max_frame_idx):
                    print("Invalid interval! Couldn't add constraint.")
                    return

            # collect input args for the constraint based on which track it is
            if is_interval:
                constraint_kwargs = {
                    "interval_id": constraint_id,
                    "start_frame_idx": start_frame_idx,
                    "end_frame_idx": end_frame_idx,
                }
            else:
                constraint_kwargs = {
                    "keyframe_id": constraint_id,
                    "frame_idx": start_frame_idx,
                }

            if constraint_type in ["Full-Body", "End-Effectors"]:
                constraint_kwargs["joints_pos"] = motion.get_joints_pos(start_frame_idx, end_frame_idx)
                constraint_kwargs["joints_rot"] = motion.get_joints_rot(start_frame_idx, end_frame_idx)
                if constraint_type == "End-Effectors":
                    constraint_kwargs["joint_names"] = joint_names
                    constraint_kwargs["end_effector_type"] = end_effector_type

            elif constraint_type == "2D Root":
                constraint_kwargs["root_pos"] = motion.get_projected_root_pos(start_frame_idx, end_frame_idx)

            # add the keyframe(s) to the constraint track
            constraint = session.constraints[constraint_type]
            if is_interval:
                constraint.add_interval(**constraint_kwargs)
            else:
                constraint.add_keyframe(**constraint_kwargs)

            apply_constraint_overlay_visibility(session)

            if verbose:
                client.add_notification(
                    title="Constraint added",
                    body="",
                    auto_close_seconds=5.0,
                    color="blue",
                )

        # timeline callbacks for keyframes and intervals
        @client.timeline.on_keyframe_add
        def _(keyframe_id: str, track_id: str, frame: int):
            """Called when a keyframe is added to a track."""
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            with session.timeline_data["keyframe_update_lock"]:
                constraint_type = session.timeline_data["tracks"][track_id]["name"]
                add_constraint_callback(
                    keyframe_id,
                    constraint_type,
                    (frame, frame),
                    verbose=False,
                )
                keyframe_data = client.timeline._keyframes.get(keyframe_id)
                session.timeline_data["keyframes"][keyframe_id] = {
                    "frame": frame,
                    "track_id": track_id,
                    "locked": bool(keyframe_data.locked) if keyframe_data is not None else False,
                    "opacity": keyframe_data.opacity if keyframe_data is not None else 1.0,
                    "value": keyframe_data.value if keyframe_data is not None else None,
                }
                # Update smooth path when adding a keyframe (single action, not drag).
                if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path:
                    motion = list(session.motions.values())[0]
                    _update_dense_path(motion, session)

        @client.timeline.on_interval_add
        def handle_interval_add(interval_id: str, track_id: str, start_frame: int, end_frame: int):
            """Called when an interval is added to a track."""
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            with session.timeline_data["keyframe_update_lock"]:
                constraint_type = session.timeline_data["tracks"][track_id]["name"]
                add_constraint_callback(
                    interval_id,
                    constraint_type,
                    (start_frame, end_frame),
                    verbose=False,
                )
                interval_data = client.timeline._intervals.get(interval_id)
                session.timeline_data["intervals"][interval_id] = {
                    "track_id": track_id,
                    "start_frame_idx": start_frame,
                    "end_frame_idx": end_frame,
                    "locked": bool(interval_data.locked) if interval_data is not None else False,
                    "opacity": interval_data.opacity if interval_data is not None else 1.0,
                    "value": interval_data.value if interval_data is not None else None,
                }
                if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path:
                    motion = list(session.motions.values())[0]
                    _update_dense_path(motion, session)

        def remove_constraint_callback(
            constraint_id: str,
            constraint_type: str,
            frame_range: tuple[int, int],
            verbose: bool = True,
        ) -> None:
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            session.updating_motions = True

            is_interval = frame_range[1] != frame_range[0]
            start_frame_idx = int(frame_range[0])
            end_frame_idx = int(frame_range[1])

            if is_interval:
                clamped = clamp_interval_to_range(start_frame_idx, end_frame_idx, session.max_frame_idx)
                if clamped is None:
                    return
                start_frame_idx, end_frame_idx = clamped
            else:
                if not validate_interval(start_frame_idx, end_frame_idx, session.max_frame_idx):
                    print("Invalid interval! Couldn't remove constraint.")
                    return

            if constraint_type in [
                "Left Hand",
                "Right Hand",
                "Left Foot",
                "Right Foot",
            ]:
                constraint_type = "End-Effectors"

            constraint = session.constraints[constraint_type]
            if is_interval:
                constraint.remove_interval(constraint_id, start_frame_idx, end_frame_idx)
            else:
                constraint.remove_keyframe(constraint_id, start_frame_idx)

            if verbose:
                client.add_notification(
                    title="Constraint removed",
                    body="",
                    auto_close_seconds=5.0,
                    color="blue",
                )

        @client.timeline.on_keyframe_move
        def handle_keyframe_move(keyframe_id: str, new_frame: int):
            """Called when a keyframe is moved to a new frame."""
            # print(f"Keyframe moved: {keyframe_id} to frame {new_frame}")
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]

            # Cancel any pending timer for this keyframe
            timeline_data = session.timeline_data
            with timeline_data["keyframe_update_lock"]:
                if keyframe_id in timeline_data["keyframe_move_timers"]:
                    timeline_data["keyframe_move_timers"][keyframe_id].cancel()

                # Store the latest target frame
                timeline_data["pending_keyframe_moves"][keyframe_id] = new_frame
                # Create a new timer to execute the actual move after a delay
                # This debounces rapid movements - only execute when user stops moving
                timer = threading.Timer(
                    0.03,  # 10ms delay - adjust as needed
                    _execute_keyframe_move,
                    args=(client_id, keyframe_id, new_frame, session),
                )
                timeline_data["keyframe_move_timers"][keyframe_id] = timer
                timer.start()

        def _execute_keyframe_move(
            client_id: int,
            keyframe_id: str,
            new_frame: int,
            session: ClientSession,
        ):
            """Actually execute the keyframe move operation (called after debounce delay)."""

            timeline_data = session.timeline_data
            with timeline_data["keyframe_update_lock"]:
                # Check if this move is still the latest one
                if keyframe_id not in timeline_data["pending_keyframe_moves"]:
                    return  # Move was cancelled

                if timeline_data["pending_keyframe_moves"][keyframe_id] != new_frame:
                    return  # A newer move superseded this one

                # Remove from pending
                del timeline_data["pending_keyframe_moves"][keyframe_id]
                if keyframe_id in timeline_data["keyframe_move_timers"]:
                    del timeline_data["keyframe_move_timers"][keyframe_id]

                # Now execute the actual move (keep it in the lock so we don't delete it while moving)
                if keyframe_id not in timeline_data["keyframes"]:
                    # double check
                    return
                keyframe_data = timeline_data["keyframes"][keyframe_id]
                if not keyframe_data:
                    return

                # if the frame did not move, don't do anything
                if keyframe_data["frame"] == new_frame:
                    return

                track_id = keyframe_data["track_id"]
                constraint_type = timeline_data["tracks"][track_id]["name"]
                cur_frame = keyframe_data["frame"]

                # Remove constraint at old frame
                remove_constraint_callback(
                    keyframe_id,
                    constraint_type,
                    (cur_frame, cur_frame),
                    verbose=False,
                )
                # Add constraint at new frame
                add_constraint_callback(
                    keyframe_id,
                    constraint_type,
                    (new_frame, new_frame),
                    verbose=False,
                )

                # update our data
                keyframe_data["frame"] = new_frame

                # Schedule path update only after user stops dragging (no move for 300ms).
                if constraint_type == "2D Root":
                    _schedule_dense_path_after_release(session)

        @client.timeline.on_keyframe_delete
        def handle_keyframe_delete(keyframe_id: str):
            """Called when a keyframe is deleted."""
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            with session.timeline_data["keyframe_update_lock"]:
                if keyframe_id not in session.timeline_data["keyframes"]:
                    return
                keyframe_data = session.timeline_data["keyframes"][keyframe_id]
                track_id = keyframe_data["track_id"]
                constraint_type = session.timeline_data["tracks"][track_id]["name"]
                cur_frame = keyframe_data["frame"]
                remove_constraint_callback(
                    keyframe_id,
                    constraint_type,
                    (cur_frame, cur_frame),
                    verbose=False,
                )
                del session.timeline_data["keyframes"][keyframe_id]
                if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path:
                    motion = list(session.motions.values())[0]
                    _update_dense_path(motion, session)

        @client.timeline.on_interval_move
        def handle_interval_move(interval_id: str, new_start: int, new_end: int):
            """Called when an interval is moved or resized."""
            # print(f"Interval moved: {interval_id} to {new_start}-{new_end}")
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]

            # Cancel any pending timer for this interval
            # We share the same lock for keyframe and interval moves assuming the user can't move both at the same time
            timeline_data = session.timeline_data
            with timeline_data["keyframe_update_lock"]:
                if interval_id in timeline_data["keyframe_move_timers"]:
                    timeline_data["keyframe_move_timers"][interval_id].cancel()

                # Store the latest target frame
                new_interval = (new_start, new_end)
                timeline_data["pending_keyframe_moves"][interval_id] = new_interval
                # Create a new timer to execute the actual move after a delay
                # This debounces rapid movements - only execute when user stops moving
                timer = threading.Timer(
                    0.5,  # 100ms delay - adding interval is much slower than moving a keyframe
                    _execute_interval_move,
                    args=(client_id, interval_id, new_interval, session),
                )
                timeline_data["keyframe_move_timers"][interval_id] = timer
                timer.start()

        def _execute_interval_move(
            client_id: int,
            interval_id: str,
            new_interval: tuple[int, int],
            session: ClientSession,
        ):
            """Actually execute the interval move operation (called after debounce delay)."""

            timeline_data = session.timeline_data
            with timeline_data["keyframe_update_lock"]:
                # Check if this move is still the latest one
                if interval_id not in timeline_data["pending_keyframe_moves"]:
                    return  # Move was cancelled

                if timeline_data["pending_keyframe_moves"][interval_id] != new_interval:
                    return  # A newer move superseded this one

                # Remove from pending
                del timeline_data["pending_keyframe_moves"][interval_id]
                if interval_id in timeline_data["keyframe_move_timers"]:
                    del timeline_data["keyframe_move_timers"][interval_id]

                # Now execute the actual move
                if interval_id not in timeline_data["intervals"]:
                    return
                interval_data = timeline_data["intervals"][interval_id]
                if not interval_data:
                    return

                # if the interval did not move, don't do anything
                if (
                    interval_data["start_frame_idx"] == new_interval[0]
                    and interval_data["end_frame_idx"] == new_interval[1]
                ):
                    return

                track_id = interval_data["track_id"]
                constraint_type = timeline_data["tracks"][track_id]["name"]
                cur_range = (
                    interval_data["start_frame_idx"],
                    interval_data["end_frame_idx"],
                )

                # Remove constraint at old frame
                remove_constraint_callback(
                    interval_id,
                    constraint_type,
                    cur_range,
                    verbose=False,
                )
                # Add constraint at new frame
                add_constraint_callback(
                    interval_id,
                    constraint_type,
                    new_interval,
                    verbose=False,
                )

                # update our data
                interval_data["start_frame_idx"] = new_interval[0]
                interval_data["end_frame_idx"] = new_interval[1]

                # Schedule path update only after user stops dragging (no move for 300ms).
                if constraint_type == "2D Root":
                    _schedule_dense_path_after_release(session)

        @client.timeline.on_interval_delete
        def handle_interval_delete(interval_id: str):
            """Called when an interval is deleted."""
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            with session.timeline_data["keyframe_update_lock"]:
                if interval_id not in session.timeline_data["intervals"]:
                    return
                interval_data = session.timeline_data["intervals"][interval_id]
                track_id = interval_data["track_id"]
                constraint_type = session.timeline_data["tracks"][track_id]["name"]
                remove_constraint_callback(
                    interval_id,
                    constraint_type,
                    (
                        interval_data["start_frame_idx"],
                        interval_data["end_frame_idx"],
                    ),
                    verbose=False,
                )
                del session.timeline_data["intervals"][interval_id]
                if constraint_type == "2D Root" and session.constraints["2D Root"].dense_path:
                    motion = list(session.motions.values())[0]
                    _update_dense_path(motion, session)

        @gui_snap_to_constraint_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return

            target_character_motion = list(session.motions.values())[0]
            frame_idx = session.frame_idx

            if frame_idx >= target_character_motion.length:
                # frame idx larger than the motion, could not snap
                return

            for constraint_name in ["Full-Body", "End-Effectors"]:
                if (
                    constraint_name in session.constraints
                    and frame_idx in session.constraints[constraint_name].keyframes
                ):
                    pos = session.constraints[constraint_name].keyframes[frame_idx]["joints_pos"]
                    rot = session.constraints[constraint_name].keyframes[frame_idx]["joints_rot"]

                    # update the full joints_pos of the character to match the constraints
                    target_character_motion.update_pose_at_frame(
                        frame_idx,
                        joints_pos=pos,
                        joints_rot=rot,
                    )
                    target_character_motion.set_frame(frame_idx)
                    return  # motion already fully changed

            if "2D Root" in session.constraints and frame_idx in session.constraints["2D Root"].keyframes:
                # update only the root position
                new_root_pos = session.constraints["2D Root"].keyframes[frame_idx]
                old_root_pos = target_character_motion.get_projected_root_pos(frame_idx)
                root_diff = new_root_pos - old_root_pos
                root_diff[1] = 0.0  # don't change height

                new_joints_pos = (
                    target_character_motion.joints_pos[frame_idx]
                    + to_torch(
                        root_diff,
                        device=target_character_motion.joints_pos.device,
                        dtype=target_character_motion.joints_pos.dtype,
                    )[None]
                )
                rot = target_character_motion.joints_rot[frame_idx]

                target_character_motion.update_pose_at_frame(
                    frame_idx,
                    joints_pos=new_joints_pos,
                    joints_rot=rot,
                )
                target_character_motion.set_frame(frame_idx)

        @gui_clear_all_constraints_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return
            with session.timeline_data["keyframe_update_lock"]:
                # use the lock here to wait for any constraint updates to finish
                for constraint in list(session.constraints.values()):
                    constraint.clear()
                client.timeline.clear_keyframes()
                client.timeline.clear_intervals()
            if gui_dense_path_checkbox.value:
                gui_dense_path_checkbox.value = False
                if "2D Root" in session.constraints:
                    session.constraints["2D Root"].set_dense_path(False)

        # generation callback
        @gui_generate_button.on_click
        def _(event: viser.GuiEvent) -> None:
            event_client = event.client
            session = get_active_session(event_client)
            if session is None:
                return

            generating_notif = event_client.add_notification(
                title="Generating motion...",
                body="Generating motions for the given prompt!",
                loading=True,
                with_close_button=False,
            )
            gui_generate_button.disabled = True
            client.timeline.disable_constraints()

            num_samples = gui_num_samples_slider.value
            timeline = session.client.timeline

            # sort them to avoid issues:
            prompt_values = sorted([x for x in timeline._prompts.values()], key=lambda x: x.start_frame)

            texts = [x.text for x in prompt_values]
            num_frames = compute_prompt_num_frames(prompt_values)

            # compute the total duration
            total_nb_frames = sum(num_frames)
            total_duration = total_nb_frames / session.model_fps

            # update just in case
            set_new_duration(client_id, total_duration)

            transitions_parameters = {
                "num_transition_frames": gui_num_transition_frames_slider.value,
            }

            # G1: postprocessing is disabled (does not work well for this model).
            postprocess_parameters = {
                "post_processing": (False if "g1" in session.model_name else gui_postprocess_checkbox.value),
                "root_margin": gui_root_margin.value,
            }
            try:
                demo.generate(
                    event_client,
                    texts,
                    num_frames,
                    num_samples,
                    gui_seed.value,
                    gui_diffusion_steps_slider.value,
                    cfg_weight=[
                        gui_cfg_text_weight_slider.value,
                        gui_cfg_constraint_weight_slider.value,
                    ],
                    cfg_type="separated" if gui_cfg_checkbox.value else "nocfg",
                    postprocess_parameters=postprocess_parameters,
                    transitions_parameters=transitions_parameters,
                    real_robot_rotations=gui_real_robot_rotations_checkbox.value,
                )
                session.max_frame_idx = int(session.cur_duration * session.model_fps - 1)
                session.max_frame_idx = int(session.cur_duration * session.model_fps) - 1
                if session.frame_idx > session.max_frame_idx:
                    session.frame_idx = session.max_frame_idx

                if num_samples > 1:
                    # add mesh selector to choose character to commit
                    def commit_motion(event: viser.GuiEvent) -> None:
                        target = event.target
                        commit_name = target.name.split("/")[1]  # e.g. /character0/simple_skinned
                        print(f"Committing motion for character: {commit_name}")
                        # delete non-selected motions
                        new_motion_kwargs = None
                        for character_name, motion in session.motions.items():
                            if character_name == commit_name:
                                new_motion_kwargs = {
                                    "skeleton": session.skeleton,
                                    "joints_rot": motion.joints_rot,
                                    "foot_contacts": motion.foot_contacts,
                                }
                                root_x_offset = motion.joints_pos[0, session.skeleton.root_idx, 0]
                                new_joints_pos = motion.joints_pos.clone()
                                new_joints_pos[..., 0] -= root_x_offset
                                new_motion_kwargs["joints_pos"] = new_joints_pos
                                break
                        # clear and re-add the selected motion
                        demo.clear_motions(event_client.client_id)
                        demo.add_character_motion(event_client, **new_motion_kwargs)
                        gui_edit_constraint_button.disabled = False
                        gui_generate_button.disabled = False
                        gui_snap_to_constraint_button.disabled = False
                        client.timeline.enable_constraints()
                        gui_generate_button.label = "Generate"
                        gui_save_example_button.disabled = False
                        gui_save_motion_button.disabled = False
                        gui_download_button.disabled = False
                        gui_save_constraints_button.disabled = False
                        gui_load_example_button.disabled = False

                    for motion in session.motions.values():
                        char = motion.character
                        character_name = char.name  # e.g. "character0"
                        if char.skinned_mesh is not None:
                            char.skinned_mesh.on_click(commit_motion)
                        elif char.g1_mesh_rig is not None:
                            # Register click on every part so any part can be clicked,
                            # and use highlight_group so the whole robot highlights together.
                            for handle in char.g1_mesh_rig.mesh_handles:
                                handle.on_click(commit_motion, highlight_group=character_name)

                    gui_edit_constraint_button.disabled = True
                    gui_generate_button.disabled = True
                    gui_snap_to_constraint_button.disabled = True
                    gui_generate_button.label = "Choose Sample Before Generating"
                    gui_save_example_button.disabled = True
                    gui_save_motion_button.disabled = True
                    gui_download_button.disabled = True
                    gui_save_constraints_button.disabled = True
                    gui_load_example_button.disabled = True
                else:
                    gui_edit_constraint_button.disabled = False
                    gui_generate_button.disabled = False
                    gui_snap_to_constraint_button.disabled = False
                    client.timeline.enable_constraints()

                generating_notif.title = "Motion generation finished!"
                generating_notif.body = "Motions have been generated successfully for the given prompt."
                if num_samples > 1:
                    generating_notif.body += " Now choose which sample to commit."
                generating_notif.loading = False
                generating_notif.with_close_button = True
                generating_notif.auto_close_seconds = 5.0
                generating_notif.color = "green"

                # put the motion at zero
                demo.set_frame(client_id, 0)

            except Exception as e:
                import traceback

                traceback.print_exc()
                print(f"Error during generation for client {event_client.client_id}: {e}")
                # Re-enable buttons and notify the user
                if event_client.client_id in demo.client_sessions:
                    session = demo.client_sessions[event_client.client_id]
                    gui_generate_button.disabled = False
                    gui_load_example_button.disabled = False
                    gui_save_example_button.disabled = False
                    gui_save_motion_button.disabled = False
                    gui_download_button.disabled = False
                    try:
                        event_client.add_notification(
                            title="Generation failed!",
                            body=f"Error: {str(e)}",
                            auto_close_seconds=5.0,
                            color="red",
                        )
                    except Exception:
                        pass
                demo.check_cuda_health()

    #
    # Visualization settings
    #
    with tab_group.add_tab("Visualize", viser.Icon.EYE):
        with client.gui.add_folder("Playback", expand_by_default=True):
            gui_model_fps = client.gui.add_number("Model FPS", initial_value=model_fps, disabled=True)
            gui_playback_speed_buttons = client.gui.add_button_group(
                "Playback Speed",
                options=[
                    "0.5x",
                    "1x",
                    "2x",
                ],
            )
            gui_playback_speed_buttons.value = "1x"

            @client.timeline.on_frame_change
            def handle_timeline_frame_change(new_frame_idx: int):
                """Update the frame when the user clicks on the timeline."""
                demo.set_frame(client_id, new_frame_idx, update_timeline=False)
                session = demo.client_sessions.get(client_id)
                if session is not None:
                    if session.edit_mode and session.motions:
                        motion = list(session.motions.values())[0]
                        snapshot_frame_idx = min(session.frame_idx, motion.length - 1)
                        ensure_edit_snapshot(session, motion, snapshot_frame_idx)
                    update_snap_to_constraint_button(session)

            @client.timeline.on_prompt_add
            async def _on_add(
                prompt_id: str,
                start_frame: int,
                end_frame: int,
                text: str,
                color: tuple[int, int, int] | None,
            ) -> None:
                update_duration_auto()

            @client.timeline.on_prompt_update
            async def _on_update(prompt_id: str, new_text: str) -> None:
                update_duration_auto()

            @client.timeline.on_prompt_resize
            async def _on_resize(prompt_id: str, new_start: int, new_end: int) -> None:
                update_duration_auto()

            @client.timeline.on_prompt_move
            async def _on_move(prompt_id: str, new_start: int, new_end: int) -> None:
                update_duration_auto()

            @client.timeline.on_prompt_delete
            async def _on_delete(prompt_id: str) -> None:
                update_duration_auto()

            def play_pause_button_callback(session: ClientSession):
                session.playing = not session.playing

            def next_frame_callback(session: ClientSession):
                if session.frame_idx < session.max_frame_idx:
                    session.frame_idx += 1
                if session.frame_idx == session.max_frame_idx:
                    pass
                demo.set_frame(client_id, session.frame_idx)

            def prev_frame_callback(session: ClientSession):
                if session.frame_idx > 0:
                    session.frame_idx -= 1
                if session.frame_idx == 0:
                    pass
                demo.set_frame(client_id, session.frame_idx)

            @gui_playback_speed_buttons.on_click
            def _(_) -> None:
                if not demo.client_active(client_id):
                    return
                speed_map = {
                    "0.5x": 0.5,
                    "1x": 1.0,
                    "2x": 2.0,
                }
                session = demo.client_sessions[client_id]
                session.playback_speed = speed_map[gui_playback_speed_buttons.value]

        with client.gui.add_folder("Body options", expand_by_default=True):
            gui_viz_skinned_mesh_checkbox = client.gui.add_checkbox("Show Mesh", initial_value=True)
            gui_viz_skinned_mesh_opacity_slider = client.gui.add_slider(
                "Mesh Opacity", min=0.0, max=1.0, step=0.01, initial_value=1.0
            )
            gui_viz_skeleton_checkbox = client.gui.add_checkbox("Show Skeleton", initial_value=False)
            gui_viz_foot_contacts_checkbox = client.gui.add_checkbox("Show Foot Contacts", initial_value=False)
            gui_viz_foot_contacts_checkbox.visible = gui_viz_skeleton_checkbox.value
        with client.gui.add_folder("Camera options", expand_by_default=True):
            gui_camera_fov_slider = client.gui.add_slider(
                "Camera FOV (deg)",
                min=30.0,
                max=90.0,
                step=1.0,
                initial_value=45.0,
            )
            client.camera.fov = np.deg2rad(gui_camera_fov_slider.value)
        with client.gui.add_folder("Interface options", expand_by_default=True):
            gui_show_timeline_checkbox = client.gui.add_checkbox(
                "Show Timeline",
                initial_value=True,
            )
            gui_show_constraint_tracks_checkbox = client.gui.add_checkbox(
                "Show Constraint tracks",
                initial_value=True,
            )
            gui_show_constraint_labels_checkbox = client.gui.add_checkbox(
                "Show Constraint labels",
                initial_value=True,
            )
            gui_show_starting_direction_checkbox = client.gui.add_checkbox(
                "Show Starting Direction",
                initial_value=True,
            )
            gui_dark_mode_checkbox = client.gui.add_checkbox(
                "Dark Mode",
                initial_value=False,  # Default to light mode
            )
            gui_show_constraint_tracks_checkbox.visible = gui_show_timeline_checkbox.value
            demo.set_start_direction_visible(client_id, gui_show_starting_direction_checkbox.value)

        @gui_dark_mode_checkbox.on_update
        def _(_):
            # Apply the theme using configure_theme (pass uuid so titlebar toggle stays)
            demo.configure_theme(
                client,
                gui_dark_mode_checkbox.value,
                titlebar_dark_mode_checkbox_uuid=gui_dark_mode_checkbox.uuid,
            )
            session = demo.client_sessions[client.client_id]
            for motion in session.motions.values():
                motion.character.change_theme(gui_dark_mode_checkbox.value)

        # Show dark mode toggle in titlebar (right of Github), hide sidebar checkbox
        demo.configure_theme(
            client,
            gui_dark_mode_checkbox.value,
            titlebar_dark_mode_checkbox_uuid=gui_dark_mode_checkbox.uuid,
        )
        gui_dark_mode_checkbox.visible = False

        @gui_show_constraint_labels_checkbox.on_update
        def _(_):
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            for constraint in session.constraints.values():
                constraint.set_label_visibility(gui_show_constraint_labels_checkbox.value)

        @gui_show_timeline_checkbox.on_update
        def _(_):
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            session.client.timeline.set_visible(gui_show_timeline_checkbox.value)
            gui_show_constraint_tracks_checkbox.visible = gui_show_timeline_checkbox.value
            if gui_show_timeline_checkbox.value:
                demo.set_constraint_tracks_visible(session, gui_show_constraint_tracks_checkbox.value)

        @gui_show_constraint_tracks_checkbox.on_update
        def _(_):
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            demo.set_constraint_tracks_visible(session, gui_show_constraint_tracks_checkbox.value)

        @gui_show_starting_direction_checkbox.on_update
        def _(_):
            if not demo.client_active(client_id):
                return
            demo.set_start_direction_visible(client_id, gui_show_starting_direction_checkbox.value)

        @gui_viz_skeleton_checkbox.on_update
        def _(_) -> None:
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            gui_viz_foot_contacts_checkbox.visible = gui_viz_skeleton_checkbox.value
            if not gui_viz_skeleton_checkbox.value:
                gui_viz_foot_contacts_checkbox.value = False
            for motion in session.motions.values():
                motion.character.set_skeleton_visibility(gui_viz_skeleton_checkbox.value)

        @gui_viz_foot_contacts_checkbox.on_update
        def _(_) -> None:
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            for motion in session.motions.values():
                motion.character.set_show_foot_contacts(
                    gui_viz_foot_contacts_checkbox.value, frame_idx=motion.cur_frame_idx
                )

        @gui_viz_skinned_mesh_checkbox.on_update
        def _(_) -> None:
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            for motion in session.motions.values():
                motion.character.set_skinned_mesh_visibility(gui_viz_skinned_mesh_checkbox.value)

        @gui_viz_skinned_mesh_opacity_slider.on_update
        def _(_) -> None:
            if not demo.client_active(client_id):
                return
            session = demo.client_sessions[client_id]
            for motion in session.motions.values():
                motion.character.set_skinned_mesh_opacity(gui_viz_skinned_mesh_opacity_slider.value)

        @gui_camera_fov_slider.on_update
        def _(_) -> None:
            if not demo.client_active(client_id):
                return
            client.camera.fov = np.deg2rad(gui_camera_fov_slider.value)

            #

    # Instructions tab
    #
    with tab_group.add_tab("Instructions", viser.Icon.INFO_CIRCLE):
        client.gui.add_markdown(DEMO_UI_INSTRUCTIONS_TAB_MD)

    #
    # Keyboard events
    #
    space_pressed = [False]

    @client.scene.on_keyboard_event("keydown", debounce_ms=100)
    def handle_key(event: viser.KeyboardEvent) -> None:
        # Check if client session still exists
        if client_id not in demo.client_sessions:
            return

        session = demo.client_sessions[client_id]

        if event.event_type == "keyup":
            if event.key == " ":
                space_pressed[0] = False
            return

        # Space bar: only toggle on FIRST press
        if event.key == " ":
            if not space_pressed[0]:
                space_pressed[0] = True
                play_pause_button_callback(session)
            return

        # Handle arrow keys: frame navigation (fast OS repeat with 50ms debounce).
        elif event.key == "ArrowLeft":
            prev_frame_callback(session)
        elif event.key == "ArrowRight":
            next_frame_callback(session)

    gui_elements = GuiElements(
        gui_play_pause_button=gui_play_pause_button,
        gui_next_frame_button=gui_next_frame_button,
        gui_prev_frame_button=gui_prev_frame_button,
        gui_generate_button=gui_generate_button,
        gui_model_fps=gui_model_fps,
        gui_timeline=gui_timeline,
        gui_viz_skeleton_checkbox=gui_viz_skeleton_checkbox,
        gui_viz_foot_contacts_checkbox=gui_viz_foot_contacts_checkbox,
        gui_viz_skinned_mesh_checkbox=gui_viz_skinned_mesh_checkbox,
        gui_viz_skinned_mesh_opacity_slider=gui_viz_skinned_mesh_opacity_slider,
        gui_camera_fov_slider=gui_camera_fov_slider,
        gui_duration_slider=gui_duration_slider,
        gui_num_samples_slider=gui_num_samples_slider,
        gui_cfg_checkbox=gui_cfg_checkbox,
        gui_cfg_text_weight_slider=gui_cfg_text_weight_slider,
        gui_cfg_constraint_weight_slider=gui_cfg_constraint_weight_slider,
        gui_diffusion_steps_slider=gui_diffusion_steps_slider,
        gui_seed=gui_seed,
        gui_postprocess_checkbox=gui_postprocess_checkbox,
        gui_root_margin=gui_root_margin,
        gui_real_robot_rotations_checkbox=gui_real_robot_rotations_checkbox,
        gui_dark_mode_checkbox=gui_dark_mode_checkbox,
        gui_use_soma_layer_checkbox=gui_use_soma_layer_checkbox,
    )
    return (
        gui_elements,
        timeline_tracks,
        example_dict,
        gui_examples_dropdown,
        gui_save_example_path_text,
        gui_model_selector,
    )


================================================
FILE: kimodo/exports/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Export utilities: MuJoCo, BVH, SMPLX/AMASS, and motion I/O helpers."""

from .bvh import bvh_to_kimodo_motion, motion_to_bvh_bytes, read_bvh_frame_time_seconds, save_motion_bvh
from .motion_convert_lib import convert_motion_files
from .motion_formats import (
    infer_npz_kind,
    infer_source_format_from_path,
    infer_target_format_from_path,
    resolve_source_fps,
)
from .motion_io import (
    KIMODO_CONVERT_TARGET_FPS,
    amass_npz_to_bytes,
    complete_motion_dict,
    g1_csv_to_bytes,
    kimodo_npz_to_bytes,
    load_amass_npz,
    load_g1_csv,
    load_kimodo_npz,
    load_kimodo_npz_as_torch,
    load_motion_file,
    motion_dict_to_numpy,
    save_kimodo_npz,
    save_kimodo_npz_at_target_fps,
)
from .mujoco import MujocoQposConverter, apply_g1_real_robot_projection
from .smplx import (
    AMASSConverter,
    amass_npz_to_kimodo_motion,
    get_amass_parameters,
    kimodo_y_up_to_amass_coord_rotation_matrix,
)

__all__ = [
    "AMASSConverter",
    "KIMODO_CONVERT_TARGET_FPS",
    "MujocoQposConverter",
    "amass_npz_to_bytes",
    "amass_npz_to_kimodo_motion",
    "apply_g1_real_robot_projection",
    "bvh_to_kimodo_motion",
    "complete_motion_dict",
    "convert_motion_files",
    "g1_csv_to_bytes",
    "get_amass_parameters",
    "infer_npz_kind",
    "infer_source_format_from_path",
    "infer_target_format_from_path",
    "kimodo_npz_to_bytes",
    "kimodo_y_up_to_amass_coord_rotation_matrix",
    "load_amass_npz",
    "load_g1_csv",
    "load_kimodo_npz",
    "load_kimodo_npz_as_torch",
    "load_motion_file",
    "motion_dict_to_numpy",
    "motion_to_bvh_bytes",
    "read_bvh_frame_time_seconds",
    "resolve_source_fps",
    "save_kimodo_npz",
    "save_kimodo_npz_at_target_fps",
    "save_motion_bvh",
]


================================================
FILE: kimodo/exports/bvh.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Export utilities for converting internal motion representations into common file formats.

This module is intended to hold lightweight serialization / export helpers that can be reused
outside of interactive demos.
"""

import os
import tempfile
from pathlib import Path
from typing import Tuple, Union

import numpy as np
import torch

from kimodo.geometry import matrix_to_quaternion as _matrix_to_quaternion


def _strip_end_site_blocks(bvh_text: str) -> str:
    """Remove all 'End Site { ... }' blocks from BVH text so output matches original format.

    bvhio adds an End Site for every leaf joint when writing; we do not set EndSite on joints, so we
    post-process the string to remove these blocks for Blender/original compatibility.
    """
    lines = bvh_text.splitlines(keepends=True)
    result = []
    i = 0
    while i < len(lines):
        line = lines[i]
        if "End Site" in line:
            # Skip this line and the following block { ... }; brace-count to find closing }
            i += 1
            if i < len(lines) and "{" in lines[i]:
                i += 1
                depth = 1
                while i < len(lines) and depth > 0:
                    if "{" in lines[i]:
                        depth += 1
                    if "}" in lines[i]:
                        depth -= 1
                    i += 1
            continue
        result.append(line)
        i += 1
    return "".join(result)


def _coerce_batch(name: str, x: torch.Tensor, *, expected_ndim: int) -> torch.Tensor:
    """Coerce (T, ...) or (1, T, ...) into (T, ...)."""
    if x.ndim == expected_ndim:
        return x
    if x.ndim == expected_ndim + 1:
        if int(x.shape[0]) != 1:
            raise ValueError(
                f"{name} has batch dimension B={int(x.shape[0])}, but BVH export " "only supports a single clip (B==1)."
            )
        return x[0]
    raise ValueError(f"{name} must have shape (T, ...) or (1, T, ...); got {tuple(x.shape)}")


def motion_to_bvh(
    local_rot_mats: torch.Tensor,
    root_positions: torch.Tensor,
    *,
    skeleton,
    fps: float,
    standard_tpose: bool = False,
) -> str:
    """Convert local rotations and root positions to BVH format; return UTF-8 string.

    Args:
        local_rot_mats: (T, J, 3, 3) or (1, T, J, 3, 3) local rotation matrices.
        root_positions: (T, 3) or (1, T, 3) root joint positions (e.g. from posed joints).
        skeleton: Skeleton with bone_order_names, bvh_neutral_joints, etc.
        fps: Frames per second for the motion.
        standard_tpose: If True, export with the rest pose being the standard T-pose rather than the rest pose consistent with the BONES-SEED dataset.
    Notes:
        BVH is plain-text. Root is named "Root" with ZYX rotation order; leaf joints
        have no End Site block.
    """
    try:
        import bvhio  # type: ignore[import-not-found]
        import glm  # type: ignore[import-not-found]
        from SpatialTransform import Pose  # type: ignore[import-not-found]
    except Exception as e:  # pragma: no cover
        raise ImportError(
            "BVH export requires `bvhio` (and its deps `PyGLM` + `SpatialTransform`). "
            "Install with: `pip install bvhio`."
        ) from e

    local_rot_mats = local_rot_mats.detach()
    root_positions = root_positions.detach()
    # SOMA: accept either somaskel30 (convert to 77) or somaskel77 (use as-is)
    if skeleton.name == "somaskel30":
        local_rot_mats = skeleton.to_SOMASkeleton77(local_rot_mats)
        skeleton = skeleton.somaskel77

    if standard_tpose:
        neutral = skeleton.neutral_joints.detach().cpu().numpy()
    else:
        # transform local rots to the original rest pose consistent with the BONES-SEED dataset
        local_rot_mats, _ = skeleton.from_standard_tpose(local_rot_mats)
        neutral = skeleton.bvh_neutral_joints.detach().cpu().numpy()

    joint_names = list(skeleton.bone_order_names)
    parents = skeleton.joint_parents.detach().cpu().numpy().astype(int)
    root_idx = int(skeleton.root_idx)

    local_rot_mats = _coerce_batch("local_rot_mats", local_rot_mats, expected_ndim=4)
    T, J = local_rot_mats.shape[:2]
    q_wxyz = _matrix_to_quaternion(local_rot_mats).detach().cpu().numpy()  # [T, J, 4]

    root_xyz = _coerce_batch("root_positions", root_positions, expected_ndim=2)
    root_xyz = root_xyz.cpu().numpy()  # [T, 3]

    # Build BVH hierarchy: Root (wrapper at origin) -> Hips (pelvis with offset in meters) -> ...
    # Offsets are in meters to match the original format.
    children: dict[int, list[int]] = {i: [] for i in range(J)}
    for i, p in enumerate(parents):
        if p >= 0:
            children[int(p)].append(int(i))

    _ROOT_CHANNELS = [
        "Xposition",
        "Yposition",
        "Zposition",
        "Zrotation",
        "Yrotation",
        "Xrotation",
    ]
    _JOINT_CHANNELS = ["Zrotation", "Yrotation", "Xrotation"]

    # Scale from meters to centimeters (match original SEED data BVH scale).
    neutral = neutral * 100
    root_xyz = root_xyz * 100

    # Hips offset from Root: use skeleton neutral; if root is at origin (zeros), use a
    # nominal pelvis height so the hierarchy is non-degenerate in Blender.
    hips_offset = neutral[root_idx]
    if (hips_offset == 0).all():
        hips_offset = np.array([0.0, 100.0, 0.0], dtype=neutral.dtype)  # 1 m in cm

    def _make_joint(i: int) -> "bvhio.BvhJoint":
        name = joint_names[i]
        j = bvhio.BvhJoint(name, offset=glm.vec3(0, 0, 0))
        if i == root_idx:
            # Hips: offset from Root (origin) in cm
            off = hips_offset
            j.Offset = glm.vec3(float(off[0]), float(off[1]), float(off[2]))
            j.Channels = _ROOT_CHANNELS.copy()
        else:
            p = int(parents[i])
            off = neutral[i] - neutral[p]
            j.Offset = glm.vec3(float(off[0]), float(off[1]), float(off[2]))
            j.Channels = _JOINT_CHANNELS.copy()

        for c in children[i]:
            j.Children.append(_make_joint(c))
        return j

    # Wrapper Root at origin; single child is Hips (skeleton root).
    root_wrapper = bvhio.BvhJoint("Root", offset=glm.vec3(0.0, 0.0, 0.0))
    root_wrapper.Channels = _ROOT_CHANNELS.copy()
    root_wrapper.Children.append(_make_joint(root_idx))
    root_joint = root_wrapper

    # Populate keyframes: Root = identity/zero, Hips = root motion, others = local rotation.
    bvh_layout = root_joint.layout()
    name_to_id = {n: idx for idx, n in enumerate(joint_names)}
    ordered_joint_ids = []
    for bj, _, _ in bvh_layout:
        if bj.Name == "Root":
            ordered_joint_ids.append(None)
        else:
            ordered_joint_ids.append(name_to_id[bj.Name])

    bvh_joints = [bj for bj, _, _ in bvh_layout]
    for bj in bvh_joints:
        bj.Keyframes = [None] * T  # type: ignore[list-item]

    identity_quat = glm.quat(1.0, 0.0, 0.0, 0.0)
    zero_vec = glm.vec3(0.0, 0.0, 0.0)
    for t in range(T):
        for bj, jid in zip(bvh_joints, ordered_joint_ids):
            if jid is None:
                position = zero_vec
                rotation = identity_quat
            elif jid == root_idx:
                pos = root_xyz[t]
                position = glm.vec3(float(pos[0]), float(pos[1]), float(pos[2]))
                qw, qx, qy, qz = q_wxyz[t, jid]
                rotation = glm.quat(float(qw), float(qx), float(qy), float(qz))
            else:
                position = zero_vec
                qw, qx, qy, qz = q_wxyz[t, jid]
                rotation = glm.quat(float(qw), float(qx), float(qy), float(qz))
            bj.Keyframes[t] = Pose(position, rotation)  # type: ignore[index]

    container = bvhio.BvhContainer(root_joint, frameCount=T, frameTime=1.0 / float(fps))
    with tempfile.NamedTemporaryFile(mode="w", suffix=".bvh", delete=False, encoding="utf-8") as f:
        tmp_path = f.name
    try:
        bvhio.writeBvh(tmp_path, container, percision=6)
        bvh_text = Path(tmp_path).read_text(encoding="utf-8")
        return _strip_end_site_blocks(bvh_text)
    finally:
        try:
            os.remove(tmp_path)
        except Exception:
            pass


def motion_to_bvh_bytes(
    local_rot_mats: torch.Tensor,
    root_positions: torch.Tensor,
    *,
    skeleton,
    fps: float,
    standard_tpose: bool = False,
) -> bytes:
    """Convert local rotations and root positions to BVH bytes (UTF-8).

    Convenience wrapper around :func:`motion_to_bvh`.
    """
    return motion_to_bvh(
        local_rot_mats,
        root_positions,
        skeleton=skeleton,
        fps=fps,
        standard_tpose=standard_tpose,
    ).encode("utf-8")


def save_motion_bvh(
    path: Union[str, Path],
    local_rot_mats: torch.Tensor,
    root_positions: torch.Tensor,
    *,
    skeleton,
    fps: float,
    standard_tpose: bool = False,
) -> None:
    """Write local rotations and root positions to a BVH file at the given path."""
    Path(path).write_text(
        motion_to_bvh(local_rot_mats, root_positions, skeleton=skeleton, fps=fps, standard_tpose=standard_tpose),
        encoding="utf-8",
    )


def read_bvh_frame_time_seconds(path: Union[str, Path]) -> float:
    """Read ``Frame Time`` from a BVH file (seconds per frame)."""
    with open(path, encoding="utf-8") as f:
        for line in f:
            if "Frame Time:" in line:
                parts = line.split()
                return float(parts[-1])
    raise ValueError(f"Could not find 'Frame Time:' in {path}")


def bvh_to_kimodo_motion(
    path: Union[str, Path],
    skeleton=None,
    *,
    standard_tpose: bool = False,
) -> Tuple:
    """Load a Kimodo-style SOMA BVH into a Kimodo motion dict.

    Expects the same hierarchy as :func:`save_motion_bvh` (``Root`` wrapper + SOMA77 joints).
    The frame rate is always read from the BVH ``Frame Time`` header.  Callers
    that need a different playback rate should resample the returned motion dict
    (see :func:`~kimodo.exports.motion_io.resample_motion_dict_to_kimodo_fps`).

    Returns:
        ``(motion_dict, source_fps)`` where ``source_fps`` is the native BVH
        frame rate read from the file header.
    """
    from kimodo.exports.motion_io import complete_motion_dict
    from kimodo.skeleton.bvh import parse_bvh_motion
    from kimodo.skeleton.registry import build_skeleton

    if skeleton is None:
        skeleton = build_skeleton(77)
    device = skeleton.neutral_joints.device

    local_rot_mats, root_trans, bvh_fps = parse_bvh_motion(str(path))
    local_rot_mats = local_rot_mats.to(device=device)
    root_trans = root_trans.to(device=device)

    if int(local_rot_mats.shape[1]) != int(skeleton.nbjoints):
        raise ValueError(
            f"BVH has {local_rot_mats.shape[1]} joints but skeleton has {skeleton.nbjoints}; "
            "use a Kimodo-exported SOMA BVH or matching skeleton."
        )
    if not standard_tpose:
        local_rot_mats, _ = skeleton.to_standard_tpose(local_rot_mats)

    return complete_motion_dict(local_rot_mats, root_trans, skeleton, float(bvh_fps)), bvh_fps


================================================
FILE: kimodo/exports/motion_convert_lib.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Library API for converting between Kimodo NPZ, AMASS NPZ, SOMA BVH, and G1 MuJoCo CSV."""

from __future__ import annotations

import warnings

import numpy as np

from kimodo.exports.bvh import bvh_to_kimodo_motion, save_motion_bvh
from kimodo.exports.motion_formats import (
    infer_source_format_from_path,
    infer_target_format_from_path,
    resolve_source_fps,
)
from kimodo.exports.motion_io import (
    load_amass_npz,
    load_g1_csv,
    load_kimodo_npz_as_torch,
    save_kimodo_npz_at_target_fps,
)
from kimodo.exports.mujoco import MujocoQposConverter
from kimodo.exports.smplx import AMASSConverter
from kimodo.skeleton.registry import build_skeleton


def convert_motion_files(
    input_path: str,
    output_path: str,
    *,
    from_fmt: str | None = None,
    to_fmt: str | None = None,
    source_fps: float | None = None,
    z_up: bool = True,
    mujoco_rest_zero: bool = False,
    bvh_standard_tpose: bool = False,
) -> None:
    """Convert a motion file between Kimodo-supported formats.

    Supported pairs (hub-and-spoke through Kimodo NPZ):

    - amass <-> kimodo
    - soma-bvh <-> kimodo
    - g1-csv <-> kimodo

    Args:
        input_path: Source file (``.npz``, ``.bvh``, or ``.csv``).
        output_path: Destination file.
        from_fmt: Source format; inferred from extension/contents when ``None``.
        to_fmt: Target format; inferred from extension when ``None``.
        source_fps: Source motion frame rate (Hz).  If provided, trusted as-is.
            If ``None``, auto-detected from BVH ``Frame Time``, AMASS
            ``mocap_frame_rate``, or default 30.
        z_up: For AMASS conversions, apply the Z-up <-> Kimodo Y-up transform.
        mujoco_rest_zero: For G1 CSV, joint angles relative to MuJoCo rest pose.
        bvh_standard_tpose: If input or output is BVH: the BVH file uses the standard T-pose 
            as its rest pose instead of the BONES-SEED rest pose.
    """
    from_fmt = from_fmt or infer_source_format_from_path(input_path)
    to_fmt = to_fmt or infer_target_format_from_path(output_path, from_fmt)

    _validate_output_extension(to_fmt, output_path)

    pair = (from_fmt, to_fmt)

    if pair == ("amass", "kimodo"):
        sk = build_skeleton(22)
        effective_source = source_fps
        if effective_source is None:
            with np.load(input_path, allow_pickle=True) as z:
                effective_source = float(z["mocap_frame_rate"]) if "mocap_frame_rate" in z.files else 30.0
        motion = load_amass_npz(input_path, source_fps=effective_source, z_up=z_up)
        save_kimodo_npz_at_target_fps(motion, sk, effective_source, output_path)
        return

    if pair == ("kimodo", "amass"):
        data, J = load_kimodo_npz_as_torch(input_path, ensure_complete=False)
        if J != 22:
            raise ValueError(f"Kimodo→AMASS requires 22 joints (SMPL-X); this file has J={J}.")
        sk = build_skeleton(22)
        effective_source = resolve_source_fps(source_fps, "kimodo", input_path, None)
        converter = AMASSConverter(fps=effective_source, skeleton=sk)
        converter.convert_save_npz(data, output_path, z_up=z_up)
        return

    if pair == ("soma-bvh", "kimodo"):
        sk = build_skeleton(77)
        motion, bvh_fps = bvh_to_kimodo_motion(input_path, skeleton=sk, standard_tpose=bvh_standard_tpose)
        effective_source = source_fps if source_fps is not None else bvh_fps
        save_kimodo_npz_at_target_fps(motion, sk, effective_source, output_path)
        return

    if pair == ("kimodo", "soma-bvh"):
        data, J = load_kimodo_npz_as_torch(input_path, ensure_complete=False)
        if J == 30:
            warnings.warn(
                f"Input has 30 joints (somaskel30); expanding to somaskel77 for BVH export.",
                UserWarning,
                stacklevel=2,
            )
            sk = build_skeleton(30)
        elif J == 77:
            sk = build_skeleton(77)
        else:
            raise ValueError(f"Kimodo→BVH requires a SOMA skeleton (30 or 77 joints); this file has J={J}.")
        effective_source = resolve_source_fps(source_fps, "kimodo", input_path, None)
        save_motion_bvh(
            output_path,
            data["local_rot_mats"],
            data["root_positions"],
            skeleton=sk,
            fps=effective_source,
            standard_tpose=bvh_standard_tpose,
        )
        return

    if pair == ("g1-csv", "kimodo"):
        sk = build_skeleton(34)
        effective_source = resolve_source_fps(source_fps, "g1-csv", input_path, None)
        motion = load_g1_csv(input_path, source_fps=effective_source, mujoco_rest_zero=mujoco_rest_zero)
        save_kimodo_npz_at_target_fps(motion, sk, effective_source, output_path)
        return

    if pair == ("kimodo", "g1-csv"):
        data, J = load_kimodo_npz_as_torch(input_path, ensure_complete=False)
        if J != 34:
            raise ValueError(f"Kimodo→CSV requires G1 with 34 joints; this file has J={J}.")
        sk = build_skeleton(34)
        effective_source = resolve_source_fps(source_fps, "kimodo", input_path, None)
        converter = MujocoQposConverter(sk)
        qpos = converter.dict_to_qpos(
            {k: v for k, v in data.items() if k in ("local_rot_mats", "root_positions")},
            device=str(sk.neutral_joints.device),
            numpy=True,
            mujoco_rest_zero=mujoco_rest_zero,
        )
        converter.save_csv(qpos, output_path)
        return

    raise ValueError(
        f"Unsupported conversion {from_fmt!r} → {to_fmt!r}. "
        "Supported: amass↔kimodo (SMPL-X NPZ), soma-bvh↔kimodo, g1-csv↔kimodo."
    )


def _validate_output_extension(to_fmt: str, output_path: str) -> None:
    lower = output_path.lower()
    if to_fmt == "kimodo" and lower.endswith(".npz"):
        return
    if to_fmt == "amass":
        if not lower.endswith(".npz"):
            raise ValueError("AMASS output must use a .npz path.")
    elif to_fmt == "soma-bvh":
        if not lower.endswith(".bvh"):
            raise ValueError("SOMA BVH output must use a .bvh path.")
    elif to_fmt == "g1-csv":
        if not lower.endswith(".csv"):
            raise ValueError("G1 CSV output must use a .csv path.")


================================================
FILE: kimodo/exports/motion_formats.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Infer motion file formats from paths and NPZ contents."""

from __future__ import annotations

import os
from typing import Literal

import numpy as np

MotionSourceFormat = Literal["amass", "kimodo", "soma-bvh", "g1-csv"]
MotionTargetFormat = Literal["amass", "kimodo", "soma-bvh", "g1-csv"]
NpzMotionKind = Literal["amass", "kimodo"]


def infer_npz_kind(path: str) -> NpzMotionKind:
    """Classify a ``.npz`` as AMASS SMPL-X or Kimodo from required array keys."""
    with np.load(path, allow_pickle=False) as z:
        keys = set(z.files)
    if "trans" in keys and "pose_body" in keys and "root_orient" in keys:
        return "amass"
    if "local_rot_mats" in keys or "posed_joints" in keys:
        return "kimodo"
    raise ValueError(
        f"Unrecognized NPZ {path!r}: expected AMASS keys (trans, pose_body, ...) "
        "or Kimodo keys (local_rot_mats, posed_joints, ...)."
    )


def infer_source_format_from_path(path: str) -> MotionSourceFormat:
    """Infer converter input format from file extension and NPZ contents when needed."""
    ext = os.path.splitext(path)[1].lower()
    if ext == ".bvh":
        return "soma-bvh"
    if ext == ".csv":
        return "g1-csv"
    if ext == ".npz":
        return infer_npz_kind(path)  # type: ignore[return-value]
    raise ValueError(f"Cannot infer format from extension of {path!r}")


def infer_target_format_from_path(path: str, from_fmt: MotionSourceFormat) -> MotionTargetFormat:
    """Infer converter output format from destination path and source format."""
    ext = os.path.splitext(path)[1].lower()
    if ext == ".bvh":
        return "soma-bvh"
    if ext == ".csv":
        return "g1-csv"
    if ext == ".npz":
        if from_fmt == "amass":
            return "kimodo"
        if from_fmt == "kimodo":
            return "amass"
        if from_fmt in ("g1-csv", "soma-bvh"):
            return "kimodo"
        raise ValueError(
            "Ambiguous .npz output: set --to to 'kimodo' or 'amass' when the input format is not amass/kimodo."
        )
    raise ValueError(f"Cannot infer output format from extension of {path!r}")


def resolve_source_fps(
    fps: float | None,
    from_kind: str,
    input_path: str,
    data: dict | None,
) -> float:
    """Resolve source frame rate (Hz) for conversion when ``fps`` is not overridden."""
    if fps is not None:
        return float(fps)
    if data is not None and "mocap_frame_rate" in data:
        return float(np.asarray(data["mocap_frame_rate"]).item())
    if from_kind == "soma-bvh":
        from kimodo.exports.bvh import read_bvh_frame_time_seconds

        return 1.0 / read_bvh_frame_time_seconds(input_path)
    return 30.0


================================================
FILE: kimodo/exports/motion_io.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Assemble Kimodo NPZ-compatible motion dicts from local rotations + root trajectory."""

from __future__ import annotations

import os
import warnings
from typing import Any, Dict, Tuple

import numpy as np
import torch

from kimodo.geometry import matrix_to_quaternion, quaternion_to_matrix
from kimodo.motion_rep.feature_utils import compute_heading_angle, compute_vel_xyz
from kimodo.motion_rep.feet import foot_detect_from_pos_and_vel
from kimodo.motion_rep.smooth_root import get_smooth_root_pos
from kimodo.skeleton import SkeletonBase
from kimodo.skeleton.registry import build_skeleton
from kimodo.tools import to_numpy

# Default motion rate for Kimodo NPZ produced by format conversion (matches common model FPS).
KIMODO_CONVERT_TARGET_FPS = 30.0


def _quaternion_slerp(q0: torch.Tensor, q1: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
    """Spherical linear interpolation; ``q0``, ``q1`` (..., 4) wxyz; ``t`` broadcastable to (...,
    1)."""
    if t.dim() < q0.dim():
        t = t.unsqueeze(-1)
    dot = (q0 * q1).sum(dim=-1, keepdim=True)
    q1 = torch.where(dot < 0, -q1, q1)
    dot = torch.abs(dot).clamp(-1.0, 1.0)
    theta_0 = torch.acos(dot)
    sin_theta = torch.sin(theta_0)
    s0 = torch.sin((1.0 - t) * theta_0) / sin_theta.clamp(min=1e-8)
    s1 = torch.sin(t * theta_0) / sin_theta.clamp(min=1e-8)
    q = s0 * q0 + s1 * q1
    return q / torch.linalg.norm(q, dim=-1, keepdim=True).clamp(min=1e-8)


def resample_motion_dict_to_kimodo_fps(
    motion_dict: Dict[str, torch.Tensor],
    skeleton: SkeletonBase,
    source_fps: float,
    target_fps: float = KIMODO_CONVERT_TARGET_FPS,
) -> Tuple[Dict[str, torch.Tensor], bool]:
    """Resample a Kimodo motion dict to ``target_fps``.

    When the fps ratio is close to an integer (e.g. 120 / 30 = 4), the faster
    stepping method is used (take every *step*-th frame).  Otherwise falls back
    to linear interp (root) + quaternion slerp (joints).

    Re-runs :func:`complete_motion_dict` at the target rate so derived channels stay consistent.

    Returns:
        The motion dict and ``True`` if time resampling was applied, else ``False`` (already at
        ``target_fps`` with matching frame count; only re-derived via FK).
    """
    local_rot_mats = motion_dict["local_rot_mats"]
    root_positions = motion_dict["root_positions"]
    local_rot_mats, root_positions = _coerce_time_local_root(local_rot_mats, root_positions)
    t_in = int(local_rot_mats.shape[0])
    if t_in < 1:
        raise ValueError("Motion must have at least one frame.")
    if source_fps <= 0:
        raise ValueError(f"source_fps must be positive; got {source_fps}")

    t_out = max(1, int(round(t_in * target_fps / source_fps)))
    if t_out == t_in and abs(float(source_fps) - float(target_fps)) < 1e-3:
        return complete_motion_dict(local_rot_mats, root_positions, skeleton, float(target_fps)), False

    ratio = source_fps / target_fps
    step = round(ratio)
    if step >= 2 and abs(ratio - step) < 0.05:
        local_out = local_rot_mats[::step]
        root_out = root_positions[::step]
    else:
        device = local_rot_mats.device
        dtype = local_rot_mats.dtype
        u = torch.linspace(0, t_in - 1, t_out, device=device, dtype=dtype)
        i0 = u.floor().long().clamp(0, t_in - 1)
        i1 = torch.minimum(i0 + 1, torch.tensor(t_in - 1, device=device))
        tau_1d = (u - i0.float()).unsqueeze(-1)
        rp0 = root_positions[i0]
        rp1 = root_positions[i1]
        root_out = (1.0 - tau_1d) * rp0 + tau_1d * rp1

        quats = matrix_to_quaternion(local_rot_mats)
        q0 = quats[i0]
        q1 = quats[i1]
        tau_q = (u - i0.float()).view(t_out, 1, 1)
        quat_out = _quaternion_slerp(q0, q1, tau_q)
        local_out = quaternion_to_matrix(quat_out)

    return complete_motion_dict(local_out, root_out, skeleton, float(target_fps)), True


def warn_kimodo_npz_framerate(source_fps: float, t_before: int, t_after: int) -> None:
    """Emit a warning after time resampling for Kimodo NPZ (linear root, quaternion slerp per
    joint)."""
    warnings.warn(
        f"Resampled motion to {KIMODO_CONVERT_TARGET_FPS:.0f} Hz for Kimodo NPZ "
        f"(source ~{source_fps:.4g} Hz, {t_before} input frames → {t_after} output frames). "
        "Pass --source-fps if the detected source rate is wrong.",
        UserWarning,
        stacklevel=3,
    )


def _coerce_time_local_root(
    local_rot_mats: torch.Tensor,
    root_positions: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Normalize to shapes (T, J, 3, 3) and (T, 3)."""
    if local_rot_mats.dim() == 5:
        if int(local_rot_mats.shape[0]) != 1:
            raise ValueError(f"local_rot_mats batch size must be 1 for single clip; got {local_rot_mats.shape[0]}")
        local_rot_mats = local_rot_mats[0]
    if root_positions.dim() == 3:
        if int(root_positions.shape[0]) != 1:
            raise ValueError(f"root_positions batch size must be 1; got {root_positions.shape[0]}")
        root_positions = root_positions[0]
    if local_rot_mats.dim() != 4:
        raise ValueError(f"local_rot_mats must be (T,J,3,3); got {tuple(local_rot_mats.shape)}")
    if root_positions.dim() != 2 or int(root_positions.shape[-1]) != 3:
        raise ValueError(f"root_positions must be (T,3); got {tuple(root_positions.shape)}")
    if int(local_rot_mats.shape[0]) != int(root_positions.shape[0]):
        raise ValueError("local_rot_mats and root_positions must have the same number of frames")
    return local_rot_mats, root_positions


def complete_motion_dict(
    local_rot_mats: torch.Tensor,
    root_positions: torch.Tensor,
    skeleton: SkeletonBase,
    fps: float,
) -> Dict[str, torch.Tensor]:
    """Build the Kimodo motion output dict from local rotations and root positions.

    Matches keys written by CLI generation (see docs/source/user_guide/output_formats.md).

    Args:
        local_rot_mats: (T, J, 3, 3) or (1, T, J, 3, 3) local rotation matrices.
        root_positions: (T, 3) or (1, T, 3) root / pelvis world positions (meters).
        skeleton: Skeleton instance (SOMA77, G1, SMPL-X, etc.).
        fps: Sampling rate (Hz).

    Returns:
        Dict with tensors ``posed_joints``, ``global_rot_mats``, ``local_rot_mats``,
        ``foot_contacts``, ``smooth_root_pos``, ``root_positions``, ``global_root_heading``.
    """
    device = local_rot_mats.device
    dtype = local_rot_mats.dtype
    local_rot_mats, root_positions = _coerce_time_local_root(
        local_rot_mats.to(device=device, dtype=dtype),
        root_positions.to(device=device, dtype=dtype),
    )

    global_rot_mats, posed_joints, _ = skeleton.fk(local_rot_mats, root_positions)

    smooth_root_pos = get_smooth_root_pos(root_positions.unsqueeze(0)).squeeze(0)

    lengths = torch.tensor([posed_joints.shape[0]], device=device)
    velocities = compute_vel_xyz(posed_joints.unsqueeze(0), fps, lengths=lengths).squeeze(0)

    heading_angle = compute_heading_angle(posed_joints.unsqueeze(0), skeleton).squeeze(0)
    global_root_heading = torch.stack([torch.cos(heading_angle), torch.sin(heading_angle)], dim=-1)

    foot_contacts = foot_detect_from_pos_and_vel(
        posed_joints.unsqueeze(0),
        velocities.unsqueeze(0),
        skeleton,
        0.15,
        0.10,
    ).squeeze(0)

    return {
        "posed_joints": posed_joints,
        "global_rot_mats": global_rot_mats,
        "local_rot_mats": local_rot_mats,
        "foot_contacts": foot_contacts,
        "smooth_root_pos": smooth_root_pos,
        "root_positions": root_positions,
        "global_root_heading": global_root_heading,
    }


def motion_dict_to_numpy(d: Dict[str, Any]) -> Dict[str, np.ndarray]:
    """Convert motion dict values to numpy arrays for ``np.savez``."""
    out: Dict[str, np.ndarray] = {}
    for k, v in d.items():
        if hasattr(v, "detach"):
            out[k] = to_numpy(v)
        elif isinstance(v, np.ndarray):
            out[k] = v
        else:
            out[k] = np.asarray(v)
    return out


def save_kimodo_npz(path: str, motion_dict: Dict[str, Any]) -> None:
    """Save a Kimodo-compatible motion dict to ``.npz`` (numpy arrays)."""
    np.savez(path, **motion_dict_to_numpy(motion_dict))


def load_kimodo_npz(path: str) -> Dict[str, np.ndarray]:
    """Load arrays from a Kimodo ``.npz`` file."""
    with np.load(path, allow_pickle=False) as data:
        return {k: np.asarray(data[k]) for k in data.files}


def load_g1_csv(
    path: str,
    source_fps: float = KIMODO_CONVERT_TARGET_FPS,
    *,
    mujoco_rest_zero: bool = False,
) -> Dict[str, torch.Tensor]:
    """Load a G1 MuJoCo ``qpos`` CSV (``(T, 36)``) into a Kimodo motion dict.

    Args:
        path: CSV path (comma-separated, no header).
        source_fps: Source frame rate (Hz) of the CSV data.
        mujoco_rest_zero: Must match how the CSV was written (see :class:`MujocoQposConverter`).
    """
    from kimodo.exports.mujoco import MujocoQposConverter

    qpos = np.loadtxt(path, delimiter=",")
    if qpos.ndim != 2 or qpos.shape[-1] != 36:
        raise ValueError(f"Expected G1 CSV with shape (T, 36); got {qpos.shape}")
    sk = build_skeleton(34)
    converter = MujocoQposConverter(sk)
    return converter.qpos_to_motion_dict(qpos, float(source_fps), mujoco_rest_zero=mujoco_rest_zero)


def load_amass_npz(
    path: str,
    source_fps: float | None = None,
    *,
    z_up: bool = True,
) -> Dict[str, torch.Tensor]:
    """Load an AMASS-style SMPL-X ``.npz`` into a Kimodo motion dict (22 joints).

    Args:
        path: NPZ with ``trans``, ``root_orient``, ``pose_body``, etc.
        source_fps: Source frame rate (Hz); if ``None``, uses ``mocap_frame_rate``
            from the file when present, else 30 Hz.
        z_up: If ``True``, apply AMASS Z-up to Kimodo Y-up transform (same as CLI).
    """
    from kimodo.exports.smplx import amass_npz_to_kimodo_motion

    sk = build_skeleton(22)
    return amass_npz_to_kimodo_motion(path, sk, source_fps=source_fps, z_up=z_up)


def load_kimodo_npz_as_torch(
    path: str,
    source_fps: float = KIMODO_CONVERT_TARGET_FPS,
    *,
    ensure_complete: bool = True,
) -> tuple[Dict[str, torch.Tensor], int]:
    """Load a Kimodo NPZ and return all arrays as torch tensors on the skeleton device.

    Args:
        path: Kimodo NPZ file path.
        source_fps: Source frame rate (Hz) used for derived channels when
            ``ensure_complete=True``.
        ensure_complete: If ``True`` and the NPZ lacks derived channels
            (``posed_joints``, ``global_rot_mats``, …), run :func:`complete_motion_dict`
            to fill them from ``local_rot_mats`` + ``root_positions``.
            If ``False``, load all arrays verbatim (requires ``local_rot_mats``).

    Returns:
        ``(tensor_dict, num_joints)``
    """
    raw = load_kimodo_npz(path)
    if "local_rot_mats" in raw:
        j = int(raw["local_rot_mats"].shape[1])
    elif "posed_joints" in raw:
        j = int(raw["posed_joints"].shape[1])
    else:
        raise ValueError("Kimodo NPZ must contain 'local_rot_mats' or 'posed_joints'.")
    sk = build_skeleton(j)
    device = sk.neutral_joints.device
    dtype = torch.float32

    if not ensure_complete:
        if "local_rot_mats" not in raw:
            raise ValueError("Kimodo NPZ must contain 'local_rot_mats' (and typically 'root_positions').")
        out: Dict[str, torch.Tensor] = {}
        for k, v in raw.items():
            out[k] = torch.from_numpy(np.asarray(v)).to(device=device, dtype=dtype)
        return out, j

    if "posed_joints" in raw and "global_rot_mats" in raw:
        out = {}
        for k, v in raw.items():
            out[k] = torch.from_numpy(np.asarray(v)).to(device=device, dtype=dtype)
        return out, j

    if "local_rot_mats" not in raw or "root_positions" not in raw:
        raise ValueError("Kimodo NPZ must contain posed_joints+global_rot_mats, or local_rot_mats+root_positions.")
    local = torch.from_numpy(np.asarray(raw["local_rot_mats"])).to(device=device, dtype=dtype)
    root = torch.from_numpy(np.asarray(raw["root_positions"])).to(device=device, dtype=dtype)
    return complete_motion_dict(local, root, sk, float(source_fps)), j


def save_kimodo_npz_at_target_fps(
    motion: Dict[str, torch.Tensor],
    skeleton: SkeletonBase,
    source_fps: float,
    output_path: str,
    target_fps: float = KIMODO_CONVERT_TARGET_FPS,
) -> None:
    """Resample a motion dict to ``target_fps`` when needed, then save Kimodo NPZ."""
    t_before = int(motion["local_rot_mats"].shape[0])
    motion, did_resample = resample_motion_dict_to_kimodo_fps(motion, skeleton, source_fps, target_fps)
    t_after = int(motion["local_rot_mats"].shape[0])
    if did_resample:
        warn_kimodo_npz_framerate(source_fps, t_before, t_after)
    save_kimodo_npz(output_path, motion)


def kimodo_npz_to_bytes(motion_dict: Dict[str, Any]) -> bytes:
    """Serialize a Kimodo motion dict to in-memory NPZ bytes."""
    import io

    buf = io.BytesIO()
    np.savez(buf, **motion_dict_to_numpy(motion_dict))
    return buf.getvalue()


def g1_csv_to_bytes(motion_dict: Dict[str, Any], skeleton: SkeletonBase, device: Any) -> bytes:
    """Convert a motion dict to G1 MuJoCo CSV bytes via :class:`MujocoQposConverter`."""
    import io

    from kimodo.exports.mujoco import MujocoQposConverter

    converter = MujocoQposConverter(skeleton)
    qpos = converter.dict_to_qpos(
        {k: v for k, v in motion_dict.items() if k in ("local_rot_mats", "root_positions")},
        device,
        numpy=True,
    )
    buf = io.StringIO()
    np.savetxt(buf, qpos, delimiter=",")
    return buf.getvalue().encode("utf-8")


def amass_npz_to_bytes(motion_dict: Dict[str, Any], skeleton: SkeletonBase, fps: float) -> bytes:
    """Convert a motion dict to AMASS NPZ bytes via :class:`AMASSConverter`."""
    import io

    from kimodo.exports.smplx import AMASSConverter

    converter = AMASSConverter(skeleton=skeleton, fps=fps)
    buf = io.BytesIO()
    converter.convert_save_npz(
        {k: v for k, v in motion_dict.items() if k in ("local_rot_mats", "root_positions")},
        buf,
    )
    return buf.getvalue()


def _read_amass_source_fps(path: str) -> float:
    """Read the source frame rate from an AMASS NPZ, defaulting to 30 Hz."""
    with np.load(path, allow_pickle=True) as z:
        if "mocap_frame_rate" in z.files:
            return float(z["mocap_frame_rate"])
    return 30.0


def load_motion_file(
    path: str,
    source_fps: float | None = None,
    target_fps: float | None = None,
    *,
    z_up: bool = True,
    mujoco_rest_zero: bool = False,
) -> tuple[Dict[str, torch.Tensor], int]:
    """Load a motion file and return a Kimodo motion dict plus joint count.

    Supports SOMA BVH (``.bvh``), G1 MuJoCo CSV (``.csv``), Kimodo NPZ, and AMASS SMPL-X NPZ
    (``.npz``).

    The motion is loaded at its native (or overridden) source rate, then
    resampled to ``target_fps`` when they differ.

    Args:
        path: Path to ``.bvh``, ``.csv``, or ``.npz``.
        source_fps: Source frame rate (Hz).  If provided, trusted as-is.
            If ``None``, auto-detected per format: BVH ``Frame Time`` header,
            AMASS ``mocap_frame_rate``, or :data:`KIMODO_CONVERT_TARGET_FPS`
            (30 Hz) for CSV / Kimodo NPZ.
        target_fps: Desired output frame rate (Hz).  Defaults to
            :data:`KIMODO_CONVERT_TARGET_FPS` (30 Hz).  The motion is
            resampled when ``source_fps`` and ``target_fps`` differ.
        z_up: AMASS NPZ only; passed to :func:`load_amass_npz`.
        mujoco_rest_zero: G1 CSV only; passed to :func:`load_g1_csv`.

    Returns:
        ``(motion_dict, num_joints)`` with the same keys as :func:`complete_motion_dict`.
    """
    from kimodo.exports.motion_formats import infer_npz_kind

    if target_fps is None:
        target_fps = KIMODO_CONVERT_TARGET_FPS

    ext = os.path.splitext(path)[1].lower()
    if ext == ".bvh":
        from kimodo.exports.bvh import bvh_to_kimodo_motion

        motion_dict, bvh_fps = bvh_to_kimodo_motion(path)
        effective_source = source_fps if source_fps is not None else bvh_fps
        num_joints = int(motion_dict["local_rot_mats"].shape[1])
    elif ext == ".csv":
        effective_source = source_fps if source_fps is not None else KIMODO_CONVERT_TARGET_FPS
        motion_dict = load_g1_csv(path, source_fps=effective_source, mujoco_rest_zero=mujoco_rest_zero)
        num_joints = 34
    elif ext == ".npz":
        kind = infer_npz_kind(path)
        if kind == "amass":
            effective_source = source_fps if source_fps is not None else _read_amass_source_fps(path)
            motion_dict = load_amass_npz(path, source_fps=effective_source, z_up=z_up)
            num_joints = 22
        else:
            effective_source = source_fps if source_fps is not None else KIMODO_CONVERT_TARGET_FPS
            motion_dict, num_joints = load_kimodo_npz_as_torch(path, source_fps=effective_source)
    else:
        raise ValueError(f"Unsupported motion file {path!r}; expected .bvh, .csv, or .npz")

    if abs(effective_source - target_fps) > 0.5:
        sk = build_skeleton(num_joints)
        motion_dict, did_resample = resample_motion_dict_to_kimodo_fps(motion_dict, sk, effective_source, target_fps)
        if did_resample:
            t_out = int(motion_dict["local_rot_mats"].shape[0])
            warnings.warn(
                f"Resampled motion from {effective_source:.4g} Hz to " f"{target_fps:.0f} Hz ({t_out} frames).",
                UserWarning,
                stacklevel=2,
            )

    return motion_dict, num_joints


================================================
FILE: kimodo/exports/mujoco.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Convert kimodo motion (y-up, z-forward) to MuJoCo qpos (z-up, x-forward) for G1 skeleton."""

import os
import xml.etree.ElementTree as ET
from typing import Optional

import numpy as np
import torch
from scipy.spatial.transform import Rotation

from kimodo.assets import skeleton_asset_path
from kimodo.geometry import (
    axis_angle_to_matrix,
    matrix_to_axis_angle,
    matrix_to_quaternion,
    quaternion_to_matrix,
)
from kimodo.skeleton import G1Skeleton34, SkeletonBase, global_rots_to_local_rots
from kimodo.tools import ensure_batched, to_numpy, to_torch

# Cache so that the same (skeleton, xml_path) returns the same converter instance.
_converter_cache: dict[tuple[int, str], "MujocoQposConverter"] = {}


class MujocoQposConverter:
    """Fast batch converter from our dictionary format to mujoco qpos with precomputed transforms.

    In mujoco, the coordination is z up and x forward, right handed.

    Features (30 joints):
    - root (pelvis, 7 = translation + rotation) + 29 dof joints (29)

    In kimodo, the coordinate system is y up and z forward, right handed.
    Features (34 joints):
    - root (pelvis) + (34 - 1) joints; among these joints, 4 are end-effector joints added by kimodo.

    Cached by (input_skeleton id, xml_path); repeated calls with the same args return the same instance.
    """

    def __new__(
        cls,
        input_skeleton: SkeletonBase,
        xml_path: str = str(skeleton_asset_path("g1skel34", "xml", "g1.xml")),
    ):
        key = (id(input_skeleton), xml_path)
        if key not in _converter_cache:
            inst = object.__new__(cls)
            _converter_cache[key] = inst
        return _converter_cache[key]

    def __init__(
        self,
        input_skeleton: SkeletonBase,
        xml_path: str = str(skeleton_asset_path("g1skel34", "xml", "g1.xml")),
    ):
        """Initialize converter with precomputed transforms.

        Args:
            xml_path: Path to the mujoco XML file containing joint definitions
        """
        if getattr(self, "_initialized", False):
            return
        self.xml_path = xml_path
        self.skeleton = input_skeleton
        self._prepare_transforms()
        self._subtree_joints = {}
        self._initialized = True

    def _prepare_transforms(self):
        """Precompute all necessary transforms for efficient batch processing."""
        # Define coordinate transformations between mujoco and kimodo space
        # 1) R_zup_to_yup: rotation around x-axis by -90 degrees
        # 2) x_forward_to_y_forward: rotation around z-axis by -90 degrees
        # Combined transformation matrix: mujoco_to_kimodo = R_zup_to_yup * x_forward_to_y_forward
        self.mujoco_to_kimodo_matrix = torch.tensor(
            [[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]], dtype=torch.float32
        )
        self.kimodo_to_mujoco_matrix = self.mujoco_to_kimodo_matrix.T  # Inverse transformation: kimodo_to_mujoco

        # Parse XML once and extract joint information
        tree = ET.parse(self.xml_path)
        root = tree.getroot()

        xml_classes = [x for x in tree.findall(".//default") if "class" in x.attrib]
        joint_axes = dict()
        class_ranges: dict[str, tuple[float, float]] = {}
        for xml_class in xml_classes:
            j = xml_class.findall("joint")
            if j:
                joint_axes[xml_class.get("class")] = j[0].get("axis")
                range_str = j[0].get("range")
                if range_str:
                    range_vals = [float(x) for x in range_str.split()]
                    if len(range_vals) == 2:
                        class_ranges[xml_class.get("class")] = (
                            range_vals[0],
                            range_vals[1],
                        )

        mujoco_hinge_joints = root.find("worldbody").findall(".//joint")  # skip the base joint
        self._mujoco_joint_axis_values_kimodo_space = torch.zeros(
            (len(mujoco_hinge_joints), 3), dtype=torch.float32
        )  # mujoco order but kimodo space
        self._mujoco_joint_axis_values_mujoco_space = torch.zeros(
            (len(mujoco_hinge_joints), 3), dtype=torch.float32
        )  # mujoco order but mujoco space

        # for the below indices, mujoco_indices_to_kimodo_indices does not include mujoco root (30 - 1 = 29 elements),
        # while kimodo_indices_to_mujoco_indices inclues the kimodo root (32 elements).
        self._mujoco_indices_to_kimodo_indices = torch.zeros((len(mujoco_hinge_joints),), dtype=torch.int32)
        self._kimodo_indices_to_mujoco_indices = (
            torch.ones((self.skeleton.nbjoints,), dtype=torch.int32) * -1
        )  # -1 means not in the csv skeleton

        self._nb_joints_mujoco = len(mujoco_hinge_joints) + 1
        self._nb_joints_kimodo = self.skeleton.nbjoints
        self._mujoco_joint_including_root_parent_list = torch.full(
            (len(mujoco_hinge_joints) + 1,), -1, dtype=torch.int32
        )
        self._mujoco_joint_including_root_list = ["pelvis_skel"]

        for joint_id_in_csv, joint in enumerate(mujoco_hinge_joints):
            joint_name_in_skeleton = joint.get("name").replace("_joint", "_skel")
            joint_parent_name_in_skeleton = self.skeleton.bone_parents[joint_name_in_skeleton]

            self._mujoco_joint_including_root_list.append(joint_name_in_skeleton)
            self._mujoco_joint_including_root_parent_list[joint_id_in_csv + 1] = (
                self._mujoco_joint_including_root_list.index(joint_parent_name_in_skeleton)
            )

            joint_idx_in_kimodo_skeleton = self.skeleton.bone_order_names.index(joint_name_in_skeleton)
            axis_values = [float(x) for x in (joint.get("axis") or joint_axes[joint.get("class")]).split(" ")]

            # the mapped axis in kimodo skeleton space is calculated as bones_axis = mujoco_to_kimodo.apply(axis_values)
            # [1, 0, 0] -> [0, 0, 1]; [0, 1, 0] -> [1, 0, 0]; [0, 0, 1] -> [0, 1, 0]
            mujoco_joint_axis_mapping_kimodo_space = [
                torch.tensor([0, 0, 1]),
                torch.tensor([1, 0, 0]),
                torch.tensor([0, 1, 0]),
            ][np.argmax(axis_values)]

            self._mujoco_joint_axis_values_kimodo_space[joint_id_in_csv] = mujoco_joint_axis_mapping_kimodo_space
            self._mujoco_joint_axis_values_mujoco_space[joint_id_in_csv] = torch.tensor(axis_values)

            self._mujoco_indices_to_kimodo_indices[joint_id_in_csv] = joint_idx_in_kimodo_skeleton
            self._kimodo_indices_to_mujoco_indices[joint_idx_in_kimodo_skeleton] = (
                joint_id_in_csv + 1
            )  # +1 for the root
        self._kimodo_indices_to_mujoco_indices[0] = 0  # the root joint mapping

        # Joint limits (min, max) in radians for each mujoco hinge, for clamping
        self._joint_limits_min = torch.full((len(mujoco_hinge_joints),), float("-inf"), dtype=torch.float32)
        self._joint_limits_max = torch.full((len(mujoco_hinge_joints),), float("inf"), dtype=torch.float32)
        for joint_id_in_csv, joint in enumerate(mujoco_hinge_joints):
            range_vals = None
            if joint.get("range"):
                range_vals = [float(x) for x in joint.get("range").split()]
            elif joint.get("class") and joint.get("class") in class_ranges:
                lo, hi = class_ranges[joint.get("class")]
                range_vals = [lo, hi]
            if range_vals is not None and len(range_vals) == 2:
                self._joint_limits_min[joint_id_in_csv] = range_vals[0]
                self._joint_limits_max[joint_id_in_csv] = range_vals[1]

        # load the offset matrices from the xml
        R_zup_to_yup = Rotation.from_euler("x", -90, degrees=True)
        x_forward_to_y_forward = Rotation.from_euler("z", -90, degrees=True)
        mujoco_to_kimodo = R_zup_to_yup * x_forward_to_y_forward

        self._rot_offsets_q2t = torch.zeros(len(self._kimodo_indices_to_mujoco_indices), 3, 3, dtype=torch.float32)
        self._rot_offsets_q2t[...] = torch.eye(3)[None]

        self._rot_offsets_f2q = torch.zeros(len(self._kimodo_indices_to_mujoco_indices), 3, 3, dtype=torch.float32)
        self._rot_offsets_f2q[...] = torch.eye(3)[None]
        parent_map = {child: parent for parent in root.iter() for child in parent}
        for i, joint in enumerate(mujoco_hinge_joints):
            body = parent_map[joint]
            if "quat" in body.attrib:
                rot = Rotation.from_quat(
                    [float(x) for x in body.get("quat").strip().split(" ")],
                    scalar_first=True,
                )
                idx = self._mujoco_indices_to_kimodo_indices[i]
                self._rot_offsets_q2t[idx] = torch.from_numpy(rot.as_matrix())
                rot = mujoco_to_kimodo * rot * mujoco_to_kimodo.inv()
                self._rot_offsets_f2q[idx] = torch.from_numpy(rot.as_matrix().T)

        # Hinge axis in f2q space so extraction uses the same frame as joint_rot_f2q.
        # Then extract(offset) gives the angle s.t. axis_angle(angle * axis_f2q) = offset, and
        # reconstruction R_local = offset.T @ axis_angle(angle * axis_f2q) = I when input is identity.
        axis_kimodo = self._mujoco_joint_axis_values_kimodo_space
        self._mujoco_joint_axis_values_f2q_space = torch.zeros_like(axis_kimodo)
        for i in range(len(mujoco_hinge_joints)):
            j = self._mujoco_indices_to_kimodo_indices[i].item()
            axis_f2q = torch.mv(self._rot_offsets_f2q[j], axis_kimodo[i])
            n = axis_f2q.norm()
            if n > 1e-8:
                axis_f2q = axis_f2q / n
            self._mujoco_joint_axis_values_f2q_space[i] = axis_f2q

        # Rest-pose DOFs: angle we extract when R_local = I (t-pose). MuJoCo limits are
        # relative to joint zero (rest pose), so we must clamp in MuJoCo space: convert
        # joint_dofs to mujoco_angle = joint_dofs - rest_dofs, clamp, then back.
        rest_rot_f2q = self._rot_offsets_f2q[self._mujoco_indices_to_kimodo_indices]
        rest_rot_f2q = rest_rot_f2q.unsqueeze(0).unsqueeze(0)
        self._rest_dofs = self._local_rots_f2q_to_joint_dofs(rest_rot_f2q).squeeze(0).squeeze(0)
        # Axis-angle rest DOFs: angle s.t. axis_angle(angle * axis_f2q) = offset. Used in
        # project_to_real_robot_rotations so extract+reconstruct round-trip and t-pose is preserved.
        rest_rot_f2q_flat = self._rot_offsets_f2q[self._mujoco_indices_to_kimodo_indices]
        full_aa = matrix_to_axis_angle(rest_rot_f2q_flat)
        self._rest_dofs_axis_angle = (full_aa * self._mujoco_joint_axis_values_f2q_space).sum(dim=-1)

    def dict_to_qpos(
        self,
        output: dict,
        device: Optional[str] = None,
        root_quat_w_first: bool = True,
        numpy: bool = True,
        mujoco_rest_zero: bool = False,
    ):
        """Convert kimodo output dict to mujoco qpos format.

        Args:
            output: dict with keys "local_rot_mats" and "root_positions".
            device: device to use for the output.
            root_quat_w_first: If True, quaternion in qpos is (w,x,y,z).
            numpy: If True, convert the output to numpy array.
            mujoco_rest_zero: If True, joint angles are written so that kimodo rest (t-pose)
                maps to q=0 in MuJoCo. If False, write raw joint_dofs.

        Returns:
            qpos: (B, T, 7+J) mujoco qpos format.
        """
        local_rot_mats = to_torch(output["local_rot_mats"], device)
        root_positions = to_torch(output["root_positions"], device)

        qpos = self.to_qpos(
            local_rot_mats,
            root_positions,
            root_quat_w_first=root_quat_w_first,
            mujoco_rest_zero=mujoco_rest_zero,
        )
        if numpy:
            qpos = to_numpy(qpos)
        return qpos

    def qpos_to_motion_dict(
        self,
        qpos: torch.Tensor | np.ndarray,
        source_fps: float,
        *,
        root_quat_w_first: bool = True,
        mujoco_rest_zero: bool = False,
    ):
        """Inverse of :meth:`to_qpos` / :meth:`dict_to_qpos` for MuJoCo CSV ``(T, 36)`` rows.

        Args:
            qpos: Shape ``(T, 36)`` or ``(1, T, 36)`` (root xyz, root quat wxyz, 29 joint angles).
            source_fps: Source frame rate (Hz) of the qpos data.
            root_quat_w_first: Must match how the CSV was written (default ``True``).
            mujoco_rest_zero: Must match :meth:`dict_to_qpos` / :meth:`to_qpos`.

        Returns:
            Kimodo motion dict (see :func:`kimodo.exports.motion_io.complete_motion_dict`).
        """
        from kimodo.exports.motion_io import complete_motion_dict

        qpos = to_torch(qpos, None)
        if qpos.dim() == 2:
            qpos = qpos.unsqueeze(0)
        device = qpos.device
        dtype = qpos.dtype
        batch_size, num_frames, ncols = qpos.shape
        if ncols != 36:
            raise ValueError(f"Expected qpos last dim 36; got {ncols}")

        kimodo_to_mujoco_matrix = self.kimodo_to_mujoco_matrix.to(device=device, dtype=dtype)
        mujoco_to_kimodo_matrix = kimodo_to_mujoco_matrix.T

        root_mujoco = qpos[..., :3]
        root_positions = torch.matmul(mujoco_to_kimodo_matrix[None, None, ...], root_mujoco[..., None]).squeeze(-1)

        quat = qpos[..., 3:7]
        if root_quat_w_first:
            root_rot_mujoco = quaternion_to_matrix(quat)
        else:
            quat_wxyz = quat[..., [3, 0, 1, 2]]
            root_rot_mujoco = quaternion_to_matrix(quat_wxyz)

        O0 = self._rot_offsets_f2q[0].to(device=device, dtype=dtype)
        # root_rot_mujoco is (..., 3, 3) after optional batch unsqueeze (e.g. (1, T, 3, 3)).
        # Use ``...il`` so ``k`` sums with ``kl``; ``...ik`` incorrectly keeps ``k`` in the output.
        R_f2q_root = torch.einsum(
            "ij,...jk,kl->...il",
            mujoco_to_kimodo_matrix,
            root_rot_mujoco,
            kimodo_to_mujoco_matrix,
        )
        R_kimodo_root = torch.einsum("ij,...jk->...ik", O0.T, R_f2q_root)

        joint_dofs = qpos[..., 7:]
        if mujoco_rest_zero:
            rest_dofs = self._rest_dofs.to(device=device, dtype=dtype)
            angles = joint_dofs + rest_dofs[None, None, :]
            use_relative = True
        else:
            angles = joint_dofs
            use_relative = False

        nb_joints = self.skeleton.nbjoints
        template = torch.eye(3, device=device, dtype=dtype).expand(batch_size, num_frames, nb_joints, 3, 3).contiguous()
        template[:, :, 0] = R_kimodo_root

        local_rot_mats = self._joint_dofs_to_local_rot_mats(
            angles,
            template,
            device,
            dtype,
            use_relative=use_relative,
        )

        if batch_size != 1:
            raise ValueError(f"Only a single clip is supported; got batch_size={batch_size}")

        return complete_motion_dict(local_rot_mats[0], root_positions[0], self.skeleton, source_fps)

    def save_csv(self, qpos: torch.Tensor | np.ndarray, csv_path):
        # comment this
        qpos = to_numpy(qpos)
        shape = qpos.shape
        if len(shape) == 2:
            # only one motion: save it
            np.savetxt(csv_path, qpos, delimiter=",")
        if len(shape) == 3:
            # batch of motions
            if shape[0] == 1:
                # if only one motion, just save it
                np.savetxt(csv_path, qpos[0], delimiter=",")
            else:
                csv_path_base, ext = os.path.splitext(csv_path)
                for i in range(shape[0]):
                    self.save_csv(qpos[i], csv_path_base + "_" + str(i).zfill(2) + ext)

    def _local_rots_to_joint_dofs(
        self,
        local_rot_mats: torch.Tensor,
        axis_vals: torch.Tensor,
    ) -> torch.Tensor:
        """Extract per-joint single-DoF angles (radians) via Euler projection (for to_qpos/f2q)."""
        x_joint_dof = torch.atan2(local_rot_mats[..., 2, 1], local_rot_mats[..., 2, 2])
        y_joint_dof = torch.atan2(local_rot_mats[..., 0, 2], local_rot_mats[..., 0, 0])
        z_joint_dof = torch.atan2(local_rot_mats[..., 1, 0], local_rot_mats[..., 1, 1])
        xyz_joint_dofs = torch.stack([x_joint_dof, y_joint_dof, z_joint_dof], dim=-1)
        axis_vals = axis_vals.to(device=local_rot_mats.device, dtype=local_rot_mats.dtype)
        joint_dofs = (xyz_joint_dofs * axis_vals[None, None, :, :]).sum(dim=-1)
        return joint_dofs

    def _local_rots_to_joint_dofs_axis_angle(
        self,
        local_rot_mats: torch.Tensor,
        axis_vals: torch.Tensor,
    ) -> torch.Tensor:
        """Extract per-joint single-DoF angles (radians) via axis-angle; round-trips with
        axis_angle_to_matrix.

        Args:
            local_rot_mats: (..., num_hinges, 3, 3) in same frame as axis_vals.
            axis_vals: (num_hinges, 3) unit axis per hinge.
        Returns:
            joint_dofs: (..., num_hinges) signed angle = dot(axis_angle(R), axis).
        """
        axis_vals = axis_vals.to(device=local_rot_mats.device, dtype=local_rot_mats.dtype)
        full_aa = matrix_to_axis_angle(local_rot_mats)
        joint_dofs = (full_aa * axis_vals).sum(dim=-1)
        return joint_dofs

    def _local_rots_f2q_to_joint_dofs(self, local_rot_mats_f2q: torch.Tensor) -> torch.Tensor:
        """Extract per-joint single-DoF angles from local rotations in f2q space (for to_qpos)."""
        axis_vals = self._mujoco_joint_axis_values_f2q_space
        return self._local_rots_to_joint_dofs(local_rot_mats_f2q, axis_vals)

    def _clamp_to_limits(self, joint_dofs: torch.Tensor) -> torch.Tensor:
        """Clamp joint angles to XML limits (radians).

        Angles are in kimodo convention (0 = rest).
        """
        device = joint_dofs.device
        lo = self._joint_limits_min.to(device=device, dtype=joint_dofs.dtype)
        hi = self._joint_limits_max.to(device=device, dtype=joint_dofs.dtype)
        return torch.clamp(joint_dofs, lo[None, None, :], hi[None, None, :])

    def _clamp_joint_dofs(self, joint_dofs: torch.Tensor, rest_dofs: torch.Tensor) -> torch.Tensor:
        """Clamp joint angles to MuJoCo limits (radians), with rest_dofs conversion."""
        device = joint_dofs.device
        rest_dofs = rest_dofs.to(device=device, dtype=joint_dofs.dtype)
        mujoco_dofs = joint_dofs - rest_dofs[None, None, :]
        lo = self._joint_limits_min.to(device=device, dtype=joint_dofs.dtype)
        hi = self._joint_limits_max.to(device=device, dtype=joint_dofs.dtype)
        mujoco_dofs = torch.clamp(mujoco_dofs, lo[None, None, :], hi[None, None, :])
        return mujoco_dofs + rest_dofs[None, None, :]

    def _joint_dofs_to_local_rot_mats(
        self,
        joint_dofs: torch.Tensor,
        original_local_rot_mats: torch.Tensor,
        device: torch.device,
        dtype: torch.dtype,
        use_relative: bool = False,
    ) -> torch.Tensor:
        """Reconstruct full local rotation matrices from 1-DoF angles."""
        out = original_local_rot_mats.clone()
        axis_kimodo = self._mujoco_joint_axis_values_kimodo_space.to(device=device, dtype=dtype)
        for i in range(joint_dofs.shape[-1]):
            j = self._mujoco_indices_to_kimodo_indices[i].item()
            angle = joint_dofs[..., i]
            axis = axis_kimodo[i]
            if use_relative:
                axis_angle = angle[..., None] * axis[None, None, :]
                R_local = axis_angle_to_matrix(axis_angle)
            else:
                rot_offsets_f2q = self._rot_offsets_f2q.to(device=device, dtype=dtype)
                axis_in_f2q = torch.mv(rot_offsets_f2q[j], axis)
                axis_angle = angle[..., None] * axis_in_f2q[None, None, :]
                R_f2q = axis_angle_to_matrix(axis_angle)
                R_local = torch.einsum("ij,btjk->btik", rot_offsets_f2q[j].T, R_f2q)
            out[:, :, j, :, :] = R_local
        return out

    @ensure_batched(local_rot_mats=5, root_positions=3, lengths=1)
    def project_to_real_robot_rotations(
        self,
        local_rot_mats: torch.Tensor,
        root_positions: torch.Tensor,
        clamp_to_limits: bool = True,
        mujoco_rest_zero: bool = False,
    ) -> dict:
        """Project full 3D local rotations to G1 real robot DoF and back to 3D for viz.

        Joint angles are extracted along each hinge axis, optionally clamped to XML limits, then
        reconstructed to 3D rotations. When mujoco_rest_zero=False (default), raw angles are used
        (baked-with-quat). When True, angles are relative to rest (0 = T-pose in MuJoCo).
        """
        device = local_rot_mats.device
        dtype = local_rot_mats.dtype

        # Transform to f2q frame and extract 1-DoF angles (axis-angle projection).
        local_rot_f2q = torch.matmul(self._rot_offsets_f2q.to(device=device, dtype=dtype), local_rot_mats)
        hinge_rots = local_rot_f2q[:, :, self._mujoco_indices_to_kimodo_indices, :, :]
        axis_f2q = self._mujoco_joint_axis_values_f2q_space.to(device=device, dtype=dtype)
        joint_dofs = self._local_rots_to_joint_dofs_axis_angle(hinge_rots, axis_f2q)

        # Optionally express angles relative to rest (MuJoCo q=0 at T-pose).
        if mujoco_rest_zero:
            rest_dofs = self._rest_dofs_axis_angle.to(device=device, dtype=dtype)
            angles = joint_dofs - rest_dofs[None, None, :]
            use_relative = True
        else:
            angles = joint_dofs
            use_relative = False

        if clamp_to_limits:
            if mujoco_rest_zero:
                angles = self._clamp_to_limits(angles)
            else:
                rest_dofs_aa = self._rest_dofs_axis_angle.to(device=device, dtype=dtype)
                angles = self._clamp_joint_dofs(angles, rest_dofs_aa)

        # Reconstruct 3D local rotations from 1-DoF angles and run FK.
        local_rot_mats_proj = self._joint_dofs_to_local_rot_mats(
            angles, local_rot_mats, device, dtype, use_relative=use_relative
        )
        global_rot_mats, posed_joints, _ = self.skeleton.fk(local_rot_mats_proj, root_positions)
        return {
            "local_rot_mats": local_rot_mats_proj,
            "global_rot_mats": global_rot_mats,
            "posed_joints": posed_joints,
            "root_positions": root_positions,
        }

    @ensure_batched(local_rot_mats=5, root_positions=3, lengths=1)
    def to_qpos(
        self,
        local_rot_mats: torch.Tensor,
        root_positions: torch.Tensor,
        root_quat_w_first: bool = True,
        mujoco_rest_zero: bool = False,
    ) -> torch.Tensor:
        """Fast batch conversion from kimodo features to mujoco qpos format.

        Args:
            local_rot_mats: (B, T, J, 3, 3) local rotation matrices (kimodo convention).
            root_positions: (B, T, 3) root positions.
            root_quat_w_first: If True, quaternion in qpos is (w,x,y,z).
            mujoco_rest_zero: If True, joint angles are written so that kimodo rest (t-pose)
                maps to q=0 in MuJoCo. If False, write raw joint_dofs.

        Returns:
            torch.Tensor of shape [batch, numFrames, 36] containing mujoco qpos data:
            - root_trans (3) + root_quat (4) + joint_dofs (29) = 36 columns
        """

        batch_size, num_frames, nb_joints = local_rot_mats.shape[:3]
        device, dtype = local_rot_mats.device, local_rot_mats.dtype

        local_rot_mats = torch.matmul(self._rot_offsets_f2q.to(device), local_rot_mats)

        batch_size, num_frames = root_positions.shape[0], root_positions.shape[1]

        # Move precomputed matrices to the same device/dtype
        kimodo_to_mujoco_matrix = self.kimodo_to_mujoco_matrix.to(device=device, dtype=dtype)

        # Initialize output tensor: [batch, numFrames, 36]
        qpos = torch.zeros((batch_size, num_frames, 36), dtype=dtype, device=device)

        # Convert root translation: apply coordinate transformation
        root_positions_mujoco = torch.matmul(kimodo_to_mujoco_matrix[None, None, ...], root_positions[..., None])
        qpos[:, :, :3] = root_positions_mujoco.view(batch_size, num_frames, 3)

        # Convert root rotation: apply coordinate transformation to rotation matrix
        root_rot = local_rot_mats[:, :, 0, :]  # [batch, numFrames, 3, 3]

        # Apply coordinate transformation: R_mujoco = kimodo_to_mujoco * R_kimodo * kimodo_to_mujoco^T
        mujoco_to_kimodo_matrix = kimodo_to_mujoco_matrix.T
        root_rot_mujoco = torch.matmul(
            torch.matmul(kimodo_to_mujoco_matrix[None, None, ...], root_rot),
            mujoco_to_kimodo_matrix[None, None, ...],
        )
        root_rot_quat = matrix_to_quaternion(root_rot_mujoco)  # [w, x, y, z]
        if root_quat_w_first:
            qpos[:, :, 3:7] = root_rot_quat[:, :, [0, 1, 2, 3]]  # [w, x, y, z]
        else:
            qpos[:, :, 3:7] = root_rot_quat[:, :, [1, 2, 3, 0]]  # [w, x, y, z] -> [x, y, z, w]

        # Joint DOFs: raw angles or relative to rest (rest = q=0 in MuJoCo).
        joint_rot_f2q = local_rot_mats[:, :, self._mujoco_indices_to_kimodo_indices, :, :]
        joint_dofs = self._local_rots_f2q_to_joint_dofs(joint_rot_f2q)
        if mujoco_rest_zero:
            rest_dofs = self._rest_dofs.to(device=device, dtype=dtype)
            qpos[:, :, 7:] = joint_dofs - rest_dofs[None, None, :]
        else:
            qpos[:, :, 7:] = joint_dofs
        return qpos


def apply_g1_real_robot_projection(
    skeleton: G1Skeleton34,
    joints_pos: torch.Tensor,
    joints_rot: torch.Tensor,
    clamp_to_limits: bool = True,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Project G1 motion to real robot DoF (1-DoF per joint) with optional axis limits.

    Extracts a single angle per hinge along its axis (1-DoF), optionally clamps to
    joint limits from the MuJoCo XML (when clamp_to_limits=True), then reconstructs
    3D rotations and runs FK. T-pose (identity local rotations) is preserved.

    Args:
        skeleton: G1 skeleton instance.
        joints_pos: (T, J, 3) or (B, T, J, 3) joint positions in global space.
        joints_rot: (T, J, 3, 3) or (B, T, J, 3, 3) global rotation matrices.
        clamp_to_limits: If True, clamp joint angles to XML axis limits (default True).

    Returns:
        (posed_joints, global_rot_mats) as tensors, same shape as inputs (batch preserved).
    """

    local_rot_mats = global_rots_to_local_rots(joints_rot, skeleton)
    root_positions = joints_pos[..., skeleton.root_idx, :]

    # Converter expects batch dim (B, T, ...); add and remove if single sequence.
    single_sequence = local_rot_mats.dim() == 4
    if single_sequence:
        local_rot_mats = local_rot_mats.unsqueeze(0)
        root_positions = root_positions.unsqueeze(0)

    converter = MujocoQposConverter(skeleton)
    projected = converter.project_to_real_robot_rotations(
        local_rot_mats, root_positions, clamp_to_limits=clamp_to_limits
    )

    out_pos = projected["posed_joints"]
    out_rot = projected["global_rot_mats"]
    if single_sequence:
        out_pos = out_pos.squeeze(0)
        out_rot = out_rot.squeeze(0)
    return out_pos, out_rot


================================================
FILE: kimodo/exports/smplx.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Convert kimodo motion to AMASS/SMPL-X compatible parameters (axis-angle, Y-up or Z-up)."""

import os
from typing import Optional

import einops
import numpy as np
import torch

from kimodo.assets import skeleton_asset_path
from kimodo.geometry import axis_angle_to_matrix, matrix_to_axis_angle
from kimodo.tools import ensure_batched, to_numpy, to_torch


def kimodo_y_up_to_amass_coord_rotation_matrix() -> np.ndarray:
    """3x3 rotation mapping Kimodo Y-up (+Z forward) to AMASS Z-up (+Y forward).

    Used by :func:`get_amass_parameters` and :func:`amass_arrays_to_kimodo_motion` (inverse).
    """
    y_up_to_z_up = np.array(
        [
            [1.0, 0.0, 0.0],
            [0.0, 0.0, -1.0],
            [0.0, 1.0, 0.0],
        ],
        dtype=np.float32,
    )
    rot_z_180 = np.array(
        [
            [-1.0, 0.0, 0.0],
            [0.0, -1.0, 0.0],
            [0.0, 0.0, 1.0],
        ],
        dtype=np.float32,
    )
    return np.matmul(rot_z_180, y_up_to_z_up).astype(np.float32)


@ensure_batched(local_rot_mats=5, root_positions=3, lengths=1)
def get_amass_parameters(
    local_rot_mats,
    root_positions,
    skeleton,
    z_up=True,
):
    """Convert local rot mats and root positions to AMASS-style trans and pose_body; optional z_up
    coordinate transform.

    Our method generates motions with Y-up and +Z forward; if z_up=True, transform to Z-up and +Y
    forward as in AMASS.
    """
    # Our method generate motions with Y-up and +Z forward
    # if z_up = True, we transform this to: Z-up with +Y forward, as in AMASS
    # Remove the root offset; SMPL-X FK adds pelvis offset back.
    pelvis_offset = skeleton.neutral_joints[skeleton.root_idx].cpu().numpy()
    trans = root_positions - pelvis_offset

    root_rot_mats = to_numpy(local_rot_mats[:, :, 0])
    local_rot_axis_angle = to_numpy(matrix_to_axis_angle(to_torch(local_rot_mats)))
    pose_body = einops.rearrange(local_rot_axis_angle[:, :, 1:], "b t j d -> b t (j d)")

    # Optionally convert from Y-up to Z-up coordinates.
    if z_up:
        y_up_to_z_up = kimodo_y_up_to_amass_coord_rotation_matrix()
        root_rot_mats = np.matmul(y_up_to_z_up, root_rot_mats)
        trans = np.matmul(trans + pelvis_offset, y_up_to_z_up.T) - pelvis_offset

    root_orient = to_numpy(matrix_to_axis_angle(to_torch(root_rot_mats)))
    return trans, root_orient, pose_body


def amass_arrays_to_kimodo_motion(
    trans: np.ndarray,
    root_orient: np.ndarray,
    pose_body: np.ndarray,
    skeleton,
    source_fps: float,
    *,
    z_up: bool = True,
):
    """Inverse of :func:`get_amass_parameters` for a single sequence (AMASS → Kimodo motion dict).

    Args:
        trans: ``(T, 3)`` AMASS root translation (same as ``trans`` in AMASS NPZ).
        root_orient: ``(T, 3)`` axis-angle root orientation in AMASS coordinates (z-up when ``z_up``).
        pose_body: ``(T, 63)`` body pose axis-angle (21 joints × 3).
        skeleton: :class:`~kimodo.skeleton.definitions.SMPLXSkeleton22` instance.
        source_fps: Source frame rate (Hz) of the AMASS recording.
        z_up: If ``True``, invert the same Y-up↔Z-up transform as ``get_amass_parameters(..., z_up=True)``.

    Returns:
        Motion dict compatible with :func:`kimodo.exports.motion_io.save_kimodo_npz`.
    """
    from kimodo.exports.motion_io import complete_motion_dict

    trans = np.asarray(trans, dtype=np.float32)
    root_orient = np.asarray(root_orient, dtype=np.float32)
    pose_body = np.asarray(pose_body, dtype=np.float32)
    if trans.ndim != 2 or trans.shape[-1] != 3:
        raise ValueError(f"trans must be (T, 3); got {trans.shape}")
    if root_orient.shape != trans.shape:
        raise ValueError(f"root_orient shape {root_orient.shape} must match trans {trans.shape}")
    t = trans.shape[0]
    if pose_body.shape != (t, 63):
        raise ValueError(f"pose_body must be (T, 63); got {pose_body.shape}")

    pelvis_offset = skeleton.neutral_joints[skeleton.root_idx].detach().cpu().numpy().astype(np.float32)
    device = skeleton.neutral_joints.device
    dtype = torch.float32

    Y_np = kimodo_y_up_to_amass_coord_rotation_matrix()
    if z_up:
        y_up_to_z_up = torch.from_numpy(Y_np).to(device=device, dtype=dtype)
        # trans_amass = root_kimodo @ Y.T - pelvis_offset  =>  root_kimodo = (trans_amass + pelvis_offset) @ Y
        root_positions_np = (trans + pelvis_offset) @ Y_np
    else:
        root_positions_np = trans + pelvis_offset

    root_positions = torch.from_numpy(root_positions_np).to(device=device, dtype=dtype)

    R_amass_root = axis_angle_to_matrix(torch.from_numpy(root_orient).to(device=device, dtype=dtype))
    if z_up:
        R_kimodo_root = torch.einsum("ij,tjk->tik", y_up_to_z_up.T, R_amass_root)
    else:
        R_kimodo_root = R_amass_root

    nb = skeleton.nbjoints
    if nb != 22:
        raise ValueError(f"Expected SMPL-X body skeleton with 22 joints; got {nb}")

    local_rot_mats = torch.zeros((t, nb, 3, 3), device=device, dtype=dtype)
    local_rot_mats[:, 0] = R_kimodo_root

    pose_aa = torch.from_numpy(pose_body.reshape(t, 21, 3)).to(device=device, dtype=dtype)
    local_rot_mats[:, 1:] = axis_angle_to_matrix(pose_aa.reshape(-1, 3)).reshape(t, 21, 3, 3)

    return complete_motion_dict(local_rot_mats, root_positions, skeleton, source_fps)


def amass_npz_to_kimodo_motion(npz_path: str, skeleton, source_fps: Optional[float] = None, *, z_up: bool = True):
    """Load an AMASS-style ``.npz`` and return a Kimodo motion dict.

    Args:
        npz_path: Path to AMASS NPZ (``trans``, ``root_orient``, ``pose_body``, ...).
        skeleton: SMPL-X skeleton instance.
        source_fps: Source frame rate (Hz); if ``None``, uses ``mocap_frame_rate``
            from the file when present, else ``30.0``.
        z_up: Same meaning as :func:`amass_arrays_to_kimodo_motion`.
    """
    with np.load(npz_path, allow_pickle=True) as data:
        trans = np.asarray(data["trans"], dtype=np.float32)
        root_orient = np.asarray(data["root_orient"], dtype=np.float32)
        pose_body = np.asarray(data["pose_body"], dtype=np.float32)
        if source_fps is None:
            source_fps = float(data["mocap_frame_rate"]) if "mocap_frame_rate" in data.files else 30.0

    return amass_arrays_to_kimodo_motion(trans, root_orient, pose_body, skeleton, source_fps, z_up=z_up)


class AMASSConverter:
    def __init__(
        self,
        fps,
        skeleton,
        beta_path=str(skeleton_asset_path("smplx22", "beta.npy")),
        mean_hands_path=str(skeleton_asset_path("smplx22", "mean_hands.npy")),
    ):
        self.fps = fps
        self.skeleton = skeleton
        # Load betas
        if os.path.exists(beta_path):
            # only use first 16 betas to match AMASS
            betas = np.load(beta_path)[:16]
        else:
            betas = np.zeros(16)

        # Load mean hands
        if os.path.exists(mean_hands_path):
            mean_hands = np.load(mean_hands_path)
        else:
            mean_hands = np.zeros(90)

        self.default_frame_params = {
            "pose_jaw": np.zeros(3),
            "pose_eye": np.zeros(6),
            "pose_hand": mean_hands,
        }
        self.output_dict_base = {
            "gender": "neutral",
            "surface_model_type": "smplx",
            "betas": betas,
            "num_betas": len(betas),
            "mocap_frame_rate": float(fps),
        }

    def convert_save_npz(self, output: dict, npz_path, z_up=True):
        trans, root_orient, pose_body = get_amass_parameters(
            output["local_rot_mats"],
            output["root_positions"],
            self.skeleton,
            z_up=z_up,
        )
        nb_frames = trans.shape[-2]

        amass_output_base = self.output_dict_base.copy()
        for key, val in self.default_frame_params.items():
            amass_output_base[key] = einops.repeat(val, "d -> t d", t=nb_frames)

        amass_output_base["mocap_time_length"] = nb_frames / self.fps
        self.save_npz(trans, root_orient, pose_body, amass_output_base, npz_path)

    def save_npz(self, trans, root_orient, pose_body, base_output, npz_path):
        shape = trans.shape
        if len(shape) == 3 and shape[0] == 1:
            # if only one motion, squeeze the data
            trans = trans[0]
            root_orient = root_orient[0]
            pose_body = pose_body[0]
            shape = trans.shape
        if len(shape) == 2:
            amass_output = {
                "trans": trans,
                "root_orient": root_orient,
                "pose_body": pose_body,
            } | base_output
            np.savez(npz_path, **amass_output)

        elif len(shape) == 3:
            # real batch of motions
            npz_path_base, ext = os.path.splitext(npz_path)
            for i in range(shape[0]):
                npz_path_i = npz_path_base + "_" + str(i).zfill(2) + ext
                self.save_npz(trans[i], root_orient[i], pose_body[i], base_output, npz_path_i)


# amass_output = {
#     "gender": "neutral",
#     "surface_model_type": "smplx",
#     "mocap_frame_rate": float(fps),
#     "mocap_time_length": len(motion) / float(fps)
#     "trans": trans,
#     "betas": betas,
#     "num_betas": len(betas),
#     "root_orient": np.array([T, 3]), # axis angle
#     "pose_body": np.array([T, 63]), # 63=21*3, axis angle 21 = 22 - root
#     "pose_hand": np.array([T, 90]), # 90=30*3=15*2*3 axis angle (load from mean_hands)
#     "pose_jaw": np.array([T, 3]), # all zeros is fine
#     "pose_eye": np.array([T, 6]), # all zeros is fine`
# }


================================================
FILE: kimodo/geometry.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Rotation and representation conversions: axis-angle, quaternion, matrix, 6D continuous."""

import torch
import torch.nn.functional as F


def angle_to_Y_rotation_matrix(angle: torch.Tensor) -> torch.Tensor:
    """Build a rotation matrix around the Y axis from a scalar angle (radians).

    Shape: angle.shape + (3, 3).
    """
    cos, sin = torch.cos(angle), torch.sin(angle)
    one, zero = torch.ones_like(angle), torch.zeros_like(angle)
    mat = torch.stack((cos, zero, sin, zero, one, zero, -sin, zero, cos), -1)
    mat = mat.reshape(angle.shape + (3, 3))
    return mat


def matrix_to_cont6d(matrix: torch.Tensor) -> torch.Tensor:
    """Convert rotation matrix to 6D continuous representation (first two columns).

    Shape: (..., 3, 3) -> (..., 6).
    """
    cont_6d = torch.concat([matrix[..., 0], matrix[..., 1]], dim=-1)
    return cont_6d


def cont6d_to_matrix(cont6d: torch.Tensor) -> torch.Tensor:
    """Convert 6D continuous representation to rotation matrix (Gram–Schmidt on two columns).

    Last dim must be 6.
    """
    assert cont6d.shape[-1] == 6, "The last dimension must be 6"
    x_raw = cont6d[..., 0:3]
    y_raw = cont6d[..., 3:6]

    x = x_raw / torch.norm(x_raw, dim=-1, keepdim=True)
    z = torch.cross(x, y_raw, dim=-1)
    z = z / torch.norm(z, dim=-1, keepdim=True)

    y = torch.cross(z, x, dim=-1)

    x = x[..., None]
    y = y[..., None]
    z = z[..., None]

    mat = torch.cat([x, y, z], dim=-1)
    return mat


def axis_angle_to_matrix(axis_angle: torch.Tensor) -> torch.Tensor:
    """Convert axis-angle to rotation matrix.

    Args:
        axis_angle: (..., 3) axis-angle vectors (angle = norm, axis = normalized)
    Returns:
        rotmat: (..., 3, 3) rotation matrices
    """
    eps = 1e-6
    angle = torch.norm(axis_angle, dim=-1, keepdim=True)  # (..., 1)
    axis = axis_angle / (angle + eps)

    x, y, z = axis.unbind(-1)

    zero = torch.zeros_like(x)
    K = torch.stack([zero, -z, y, z, zero, -x, -y, x, zero], dim=-1).reshape(*axis.shape[:-1], 3, 3)

    eye = torch.eye(3, device=axis.device, dtype=axis.dtype)
    eye = eye.expand(*axis.shape[:-1], 3, 3)

    sin = torch.sin(angle)[..., None]
    cos = torch.cos(angle)[..., None]

    R = eye + sin * K + (1 - cos) * (K @ K)
    return R


def matrix_to_axis_angle(R: torch.Tensor) -> torch.Tensor:
    """Convert rotation matrix to axis-angle via quaternions (more numerically stable).

    Args:
        R: (..., 3, 3) rotation matrices
    Returns:
        axis_angle: (..., 3)
    """
    # Go through quaternions for numerical stability
    quat = matrix_to_quaternion(R)  # (..., 4) with (w, x, y, z)
    return quaternion_to_axis_angle(quat)


def quaternion_to_axis_angle(quat: torch.Tensor) -> torch.Tensor:
    """Convert quaternion to axis-angle representation.

    Args:
        quat: (..., 4) quaternions with real part first (w, x, y, z)
    Returns:
        axis_angle: (..., 3)
    """
    eps = 1e-6

    # Ensure canonical form to avoid sign ambiguity.
    # Primary: prefer w > 0. When w ≈ 0 (angle ≈ π), prefer first nonzero xyz > 0.
    w = quat[..., 0:1]
    xyz = quat[..., 1:]

    # Find first significant component of xyz for tie-breaking when w ≈ 0
    first_significant = xyz[..., 0:1]  # use x component as tie-breaker

    # Flip if: w < 0, OR (w ≈ 0 AND first xyz component < 0)
    should_flip = (w < -eps) | ((w.abs() <= eps) & (first_significant < 0))
    quat = torch.where(should_flip, -quat, quat)

    w = quat[..., 0]
    xyz = quat[..., 1:]

    # sin(angle/2) = ||xyz||
    sin_half_angle = xyz.norm(dim=-1)

    # angle = 2 * atan2(sin(angle/2), cos(angle/2))
    # This is more stable than 2 * acos(w) near angle=0
    angle = 2.0 * torch.atan2(sin_half_angle, w)

    # axis = xyz / sin(angle/2), but handle small angles
    # For small angles: axis-angle ≈ 2 * xyz (since sin(x) ≈ x for small x)
    small_angle = sin_half_angle.abs() < eps

    # Safe division
    scale = torch.where(
        small_angle,
        2.0 * torch.ones_like(angle),  # small angle: axis_angle ≈ 2 * xyz
        angle / sin_half_angle.clamp(min=eps),
    )

    return xyz * scale.unsqueeze(-1)


def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor:
    """Returns torch.sqrt(torch.max(0, x)) subgradient is zero where x is 0."""
    return torch.sqrt(x * (x > 0).to(x.dtype))


def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor:
    """Convert rotations given as rotation matrices to quaternions.

    Args:
        matrix: Rotation matrices as tensor of shape (..., 3, 3).
    Returns:
        quaternions with real part first, as tensor of shape (..., 4).
    """
    if matrix.size(-1) != 3 or matrix.size(-2) != 3:
        raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.")

    batch_dim = matrix.shape[:-2]
    m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(matrix.reshape(batch_dim + (9,)), dim=-1)

    q_abs = _sqrt_positive_part(
        torch.stack(
            [
                1.0 + m00 + m11 + m22,
                1.0 + m00 - m11 - m22,
                1.0 - m00 + m11 - m22,
                1.0 - m00 - m11 + m22,
            ],
            dim=-1,
        )
    )

    quat_by_rijk = torch.stack(
        [
            torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1),
            torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1),
            torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1),
            torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1),
        ],
        dim=-2,
    )

    flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device)
    quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr))

    return (
        (F.one_hot(q_abs.argmax(dim=-1), num_classes=4)[..., None] * quat_candidates)
        .sum(dim=-2)
        .reshape(batch_dim + (4,))
    )


def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor:
    """Convert rotations given as quaternions to rotation matrices.

    Args:
        quaternions: quaternions with real part first,
            as tensor of shape (..., 4).
    Returns:
        Rotation matrices as tensor of shape (..., 3, 3).
    """
    r, i, j, k = torch.unbind(quaternions, -1)
    two_s = 2.0 / (quaternions * quaternions).sum(-1)

    o = torch.stack(
        (
            1 - two_s * (j * j + k * k),
            two_s * (i * j - k * r),
            two_s * (i * k + j * r),
            two_s * (i * j + k * r),
            1 - two_s * (i * i + k * k),
            two_s * (j * k - i * r),
            two_s * (i * k - j * r),
            two_s * (j * k + i * r),
            1 - two_s * (i * i + j * j),
        ),
        -1,
    )
    return o.reshape(quaternions.shape[:-1] + (3, 3))


================================================
FILE: kimodo/meta.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Parse and normalize prompt text/duration data from meta dicts."""

import os
from typing import Any, Optional

from kimodo.tools import load_json

from .sanitize import sanitize_text, sanitize_texts


def load_prompts_from_meta(meta_path: str, **kwargs):
    """Load prompts from a meta dict or file. If fps is provided, the durations are converted to
    frames.

    Args:
        meta_path: Path to the meta file.
        **kwargs: Additional arguments to pass to parse_prompts_from_meta.

    Returns:
        texts: List of texts.
        durations: List of durations in seconds or frames.
    """
    if not os.path.exists(meta_path):
        raise FileNotFoundError(f"meta.json not found in input folder: {meta_path}")

    meta = load_json(meta_path)
    return parse_prompts_from_meta(meta, **kwargs)


def parse_prompts_from_meta(
    meta: dict[str, Any],
    fps: Optional[float] = None,
    sanitize: bool = False,
) -> tuple[list[str], list[float]]:
    """Parse prompt texts and durations from a meta dict into normalized lists. If fps is provided,
    the durations are converted to frames.

    Accepts either:
    - Single prompt: "text" (str) and "duration" (float) in seconds.
    - Multiple prompts: "texts" (list of str) and "durations" (list of float) in seconds.

    Returns:
        (texts, durations): texts as list of str, durations as list of float (seconds or frames).
        Lengths of both lists are equal.

    Raises:
        ValueError: If meta does not contain a recognized format.
    """
    # Single prompt
    if "text" in meta and "duration" in meta:
        text = meta["text"]
        duration = float(meta["duration"])
        if fps is not None:
            duration = int(duration * fps)
        if isinstance(text, list):
            raise ValueError("meta has 'text' but it is a list; use 'texts' for multiple prompts")

        if sanitize:
            text = sanitize_text(text)
        return ([text], [duration])

    # Multiple prompts
    if "texts" in meta and "durations" in meta:
        texts = meta["texts"]
        durations = meta["durations"]
        if not isinstance(texts, list) or not isinstance(durations, list):
            raise ValueError("meta 'texts' and 'durations' must be lists")
        if len(texts) != len(durations):
            raise ValueError(f"meta 'texts' and 'durations' length mismatch: {len(texts)} vs {len(durations)}")
        durations = [float(d) for d in durations]
        if fps is not None:
            durations = [int(d * fps) for d in durations]

        if sanitize:
            texts = sanitize_texts(texts)
        return texts, durations

    raise ValueError("meta must contain either 'text' and 'duration', or 'texts' and 'durations'.")


================================================
FILE: kimodo/metrics/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Evaluation metrics for motion quality (foot skate, contact consistency, constraint following)."""

from .base import (
    Metric,
    aggregate_metrics,
    clear_metrics,
    compute_metrics,
)
from .constraints import ContraintFollow
from .foot_skate import (
    FootContactConsistency,
    FootSkateFromContacts,
    FootSkateFromHeight,
    FootSkateRatio,
)
from .tmr import (
    TMR_EmbeddingMetric,
    TMR_Metric,
    compute_tmr_per_sample_retrieval,
    compute_tmr_retrieval_metrics,
)

__all__ = [
    "Metric",
    "ContraintFollow",
    "FootContactConsistency",
    "FootSkateFromContacts",
    "FootSkateFromHeight",
    "FootSkateRatio",
    "TMR_EmbeddingMetric",
    "TMR_Metric",
    "aggregate_metrics",
    "clear_metrics",
    "compute_metrics",
    "compute_tmr_per_sample_retrieval",
    "compute_tmr_retrieval_metrics",
]


================================================
FILE: kimodo/metrics/base.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Base metric class and batch/aggregate helpers."""

from __future__ import annotations

from collections import defaultdict
from typing import Dict, List

import torch


class Metric:
    """Base class for metrics that accumulate results over multiple __call__ and expose
    aggregate()."""

    def __init__(self, **kwargs):
        self.clear()

    def __call__(self, *args, **kwargs):
        """Compute metric for current batch, append to saved_metrics, and return the batch
        result."""
        metrics = self._compute(*args, **kwargs)
        for key, val in metrics.items():
            self.saved_metrics[key].append(val.detach().cpu().float())
        return metrics

    def _compute(self, **kwargs):
        """Subclasses implement this to compute metric dict from batch inputs."""
        raise NotImplementedError()

    def clear(self):
        """Reset all accumulated metric values."""
        self.saved_metrics = defaultdict(list)

    def aggregate(self):
        """Return a dict of concatenated/stacked tensors over all accumulated batches."""
        output = {}
        for key, lst in self.saved_metrics.items():
            try:
                output[key] = torch.cat(lst)
            except RuntimeError:
                output[key] = torch.stack(lst)
        return output


def compute_metrics(metrics_list: List[Metric], metrics_in: Dict) -> Dict:
    """Run each metric on metrics_in and return the combined dict of batch results."""
    metrics_out = {}
    for metric in metrics_list:
        metrics_out.update(metric(**metrics_in))
    return metrics_out


def aggregate_metrics(metrics_list: List[Metric]) -> Dict:
    """Return combined aggregated results (concatenated over batches) for all metrics."""
    metrics_out = {}
    for metric in metrics_list:
        metrics_out.update(metric.aggregate())
    return metrics_out


def clear_metrics(metrics_list: List[Metric]) -> None:
    """Clear accumulated values for all metrics in the list."""
    for metric in metrics_list:
        metric.clear()


================================================
FILE: kimodo/metrics/constraints.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Constraint-following metrics."""

from __future__ import annotations

from collections import defaultdict
from typing import Dict, List, Optional

import torch
from torch import Tensor

from kimodo.constraints import (
    EndEffectorConstraintSet,
    FullBodyConstraintSet,
    Root2DConstraintSet,
)
from kimodo.tools import ensure_batched

from .base import Metric


class ContraintFollow(Metric):
    """Constraint-following metric dispatcher for kimodo constraint sets."""

    def __init__(
        self,
        skeleton,
        root_threshold: float = 0.10,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.skeleton = skeleton
        self.root_threshold = root_threshold

    @ensure_batched(posed_joints=4, constraints_lst=2, lengths=1)
    def _compute(
        self,
        posed_joints: Tensor,
        constraints_lst: Optional[List],
        lengths: Optional[Tensor] = None,
        **kwargs,
    ) -> Dict:
        if not constraints_lst:
            return {}

        root_idx = self.skeleton.root_idx
        output = defaultdict(list)

        for posed_joints_s, constraint_lst_s, lengths_s in zip(posed_joints, constraints_lst, lengths):
            output_seq = defaultdict(list)
            for constraint in constraint_lst_s:
                frame_idx = constraint.frame_indices.to(device=posed_joints_s.device, dtype=torch.long)
                assert frame_idx.max() < lengths_s, "The constraint is defined outsite the lenght of the motion."
                if frame_idx.numel() == 0:
                    continue

                if isinstance(constraint, Root2DConstraintSet):
                    pred_root2d = posed_joints_s[frame_idx, root_idx][:, [0, 2]]
                    target = constraint.smooth_root_2d.to(posed_joints_s.device)

                    dist = torch.norm(pred_root2d - target, dim=-1)
                    output_seq["constraint_root2d_err"].append(dist)
                    hit = (dist <= self.root_threshold).float()
                    output_seq["constraint_root2d_acc"].append(hit)

                elif isinstance(constraint, FullBodyConstraintSet):
                    pred = posed_joints_s[frame_idx]
                    target = constraint.global_joints_positions.to(posed_joints_s.device)
                    err = torch.norm(pred - target, dim=-1)
                    output_seq["constraint_fullbody_keyframe"].append(err)

                elif isinstance(constraint, EndEffectorConstraintSet):
                    pos_idx = constraint.pos_indices.to(device=posed_joints_s.device, dtype=torch.long)
                    pred = posed_joints_s[frame_idx].index_select(1, pos_idx)
                    target = constraint.global_joints_positions.to(posed_joints_s.device).index_select(1, pos_idx)
                    err = torch.norm(pred - target, dim=-1)
                    output_seq["constraint_end_effector"].append(err)

            # in case we have several same constraints in the list
            for key, val in output_seq.items():
                output[key].append(torch.cat(val).mean())

        reduced = {}
        for key, vals in output.items():
            reduced[key] = torch.stack(vals, dim=0)
        return reduced


================================================
FILE: kimodo/metrics/foot_skate.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Foot skate and contact consistency metrics."""

from __future__ import annotations

from typing import Dict, Optional

import torch
from torch import Tensor

from kimodo.motion_rep.feature_utils import compute_vel_xyz
from kimodo.motion_rep.feet import foot_detect_from_pos_and_vel
from kimodo.skeleton import SkeletonBase
from kimodo.tools import ensure_batched

from .base import Metric


def get_four_contacts(fidx: list):
    if len(fidx) == 4:
        return fidx
    if len(fidx) == 6:
        # For soma77
        # remove "LeftToeEnd" and "RightToeEnd"
        fidx = fidx[:2] + fidx[3:5]
        return fidx
    raise ValueError("Expects 4 or 6 foot joints (heel/toe per foot)")


class FootSkateFromHeight(Metric):
    """When toe joint is near the floor, measures mean velocity of the toes."""

    def __init__(
        self,
        skeleton: SkeletonBase,
        fps: float,
        height_thresh: float = 0.05,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.height_thresh = height_thresh
        self.skeleton = skeleton
        self.fps = fps

    @ensure_batched(posed_joints=4, lengths=1)
    def _compute(
        self,
        posed_joints: Tensor,
        lengths: Optional[Tensor] = None,
        **kwargs,
    ) -> Dict:
        fidx = self.skeleton.foot_joint_idx
        fidx = get_four_contacts(fidx)

        feet_pos = posed_joints[:, :, fidx]
        toe_pos = feet_pos[:, :, [1, 3]]

        toe_on_floor = (toe_pos[..., 1] < self.height_thresh)[:, :-1]  # y-up [B, T, 2] where [left right]

        dt = 1.0 / self.fps
        toe_vel = torch.norm(toe_pos[:, 1:] - toe_pos[:, :-1], dim=-1) / dt  # [B, nframes-1, 2]

        # compute err
        contact_toe_vel = toe_vel * toe_on_floor  # vel when corresponding toe is on ground

        # account for generated length
        # since they are velocities use length-1 to avoid inaccurate vel going one frame past len
        device = toe_on_floor.device
        len_mask = torch.arange(toe_on_floor.shape[1], device=device)[None, :, None].expand(toe_on_floor.shape) < (
            lengths[:, None, None] - 1
        )
        toe_on_floor = toe_on_floor * len_mask
        contact_toe_vel = contact_toe_vel * len_mask

        mean_vel = torch.sum(contact_toe_vel, (1, 2)) / (torch.sum(toe_on_floor, (1, 2)) + 1e-6)
        return {"foot_skate_from_height": mean_vel}


class FootSkateFromContacts(Metric):
    """Measures velocity of the toes and ankles when predicted to be in contact."""

    def __init__(
        self,
        skeleton: SkeletonBase,
        fps: float,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.skeleton = skeleton
        self.fps = fps

    @ensure_batched(posed_joints=4, foot_contacts=3, lengths=1)
    def _compute(
        self,
        posed_joints: Tensor,
        foot_contacts: Tensor,
        lengths: Optional[Tensor] = None,
        **kwargs,
    ) -> Dict:
        fidx = self.skeleton.foot_joint_idx
        fidx = get_four_contacts(fidx)

        feet_pos = posed_joints[:, :, fidx]
        dt = 1.0 / self.fps
        foot_vel = torch.norm(feet_pos[:, 1:] - feet_pos[:, :-1], dim=-1) / dt

        if foot_contacts.shape[-1] == 6:
            # For soma77
            # remove "LeftToeEnd" and "RightToeEnd"
            foot_contacts = foot_contacts[..., [0, 1, 3, 4]]

        foot_contacts = foot_contacts[:, :-1]
        vel_err = foot_vel * foot_contacts

        # account for generated length
        # since they are velocities use length-1 to avoid inaccurate vel going one frame past len
        device = foot_contacts.device
        len_mask = torch.arange(foot_contacts.shape[1], device=device)[None, :, None].expand(foot_contacts.shape) < (
            lengths[:, None, None] - 1
        )
        foot_contacts = foot_contacts * len_mask
        vel_err = vel_err * len_mask

        mean_vel = torch.sum(vel_err, (1, 2)) / (torch.sum(foot_contacts, (1, 2)) + 1e-6)  # mean over contacting frames

        # Compute max velocity error across all feet and frames (per batch)
        max_vel = vel_err.amax(dim=(1, 2))  # [B]

        return {
            "foot_skate_from_pred_contacts": mean_vel,
            "foot_skate_max_vel": max_vel,
        }


class FootSkateRatio(Metric):
    """Compute fraction of frames where the foot skates when it is on the ground.

    Inspired by GMD: https://github.com/korrawe/guided-motion-diffusion/blob/main/data_loaders/humanml/utils/metrics.py#L204
    """

    def __init__(
        self,
        skeleton: SkeletonBase,
        fps: float,
        height_thresh=0.05,
        vel_thresh=0.2,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.height_thresh = height_thresh
        self.vel_thresh = vel_thresh

        self.skeleton = skeleton
        self.fps = fps

    @ensure_batched(posed_joints=4, foot_contacts=3, lengths=1)
    def _compute(
        self,
        posed_joints: Tensor,
        foot_contacts: Tensor,
        lengths: Optional[Tensor] = None,
        **kwargs,
    ) -> Dict:
        fidx = self.skeleton.foot_joint_idx
        fidx = get_four_contacts(fidx)

        feet_pos = posed_joints[:, :, fidx]
        toe_pos = feet_pos[:, :, [1, 3]]

        toe_on_floor = toe_pos[..., 1] < self.height_thresh  # y-up [B, T, 2] where [left right]
        # current and next frame on floor to consider it in contact
        toe_on_floor = torch.logical_and(toe_on_floor[:, :-1], toe_on_floor[:, 1:])  # [B, T-1, 2]

        dt = 1.0 / self.fps
        toe_vel = torch.norm(toe_pos[:, 1:] - toe_pos[:, :-1], dim=-1) / dt  # [B, nframes-1, 2]

        # compute err
        contact_toe_vel = toe_vel * toe_on_floor  # vel when corresponding toe is on ground

        # account for generated length
        # since they are velocities use length-1 to avoid inaccurate vel going one frame past len
        device = toe_on_floor.device
        len_mask = torch.arange(toe_on_floor.shape[1], device=device)[None, :, None].expand(toe_on_floor.shape) < (
            lengths[:, None, None] - 1
        )
        toe_on_floor = toe_on_floor * len_mask
        contact_toe_vel = contact_toe_vel * len_mask

        # skating if velocity during contact > thresh
        toe_skate = contact_toe_vel > self.vel_thresh
        skate_ratio = torch.sum(toe_skate, (1, 2)) / (torch.sum(toe_on_floor, (1, 2)) + 1e-6)
        return {"foot_skate_ratio": skate_ratio}


class FootContactConsistency(Metric):
    """Measures consistency between heuristic detected foot contacts (from height and velocity) and
    predicted foot contacts.

    i.e. accuracy of how well predicted matches heuristic.
    """

    def __init__(
        self,
        skeleton: SkeletonBase,
        fps: float,
        vel_thresh: float = 0.15,
        height_thresh: float = 0.10,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.vel_thresh = vel_thresh
        self.height_thresh = height_thresh

        self.skeleton = skeleton
        self.fps = fps

    @ensure_batched(posed_joints=4, foot_contacts=3, lengths=1)
    def _compute(
        self,
        posed_joints: Tensor,
        foot_contacts: Tensor,
        lengths: Optional[Tensor] = None,
        **kwargs,
    ) -> Dict:
        velocity = compute_vel_xyz(posed_joints, float(self.fps), lengths=lengths)
        heuristic_contacts = foot_detect_from_pos_and_vel(
            posed_joints,
            velocity,
            self.skeleton,
            self.vel_thresh,
            self.height_thresh,
        )

        if foot_contacts.shape[-1] == 6:
            # For soma77
            # remove "LeftToeEnd" and "RightToeEnd"
            foot_contacts = foot_contacts[..., [0, 1, 3, 4]]

        num_contacts = foot_contacts.shape[-1]
        incorrect = torch.logical_xor(heuristic_contacts, foot_contacts)
        # account for generated length
        # since they are velocities, use length-1 to avoid inaccurate vel going one frame past len
        device = foot_contacts.device
        len_mask = torch.arange(foot_contacts.shape[1], device=device)[None, :, None].expand(foot_contacts.shape) < (
            lengths[:, None, None] - 1
        )
        incorrect = incorrect * len_mask

        incorrect_ratio = torch.sum(incorrect, (1, 2)) / (num_contacts * (lengths - 1))
        accuracy = 1 - incorrect_ratio

        return {"foot_contact_consistency": accuracy}


================================================
FILE: kimodo/metrics/tmr.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""TMR evaluation metrics: text-motion retrieval, R-Precision, and related scores."""

from __future__ import annotations

from collections import defaultdict
from typing import Any, Dict, List, Optional

import numpy as np
import torch
from scipy import linalg
from torch import Tensor

from kimodo.model.tmr import TMR

from .base import Metric


# Scores are between 0 and 1
def get_score_matrix_unit(x, y):
    sim_matrix = np.einsum("b i, c i -> b c", x, y)
    scores = sim_matrix / 2 + 0.5
    return scores


def get_scores_unit(x, y):
    similarity = np.einsum("... i, ... i", x, y)
    scores = similarity / 2 + 0.5
    return scores


def compute_tmr_per_sample_retrieval(
    motion_emb: np.ndarray,
    text_emb: np.ndarray,
    sample_ids: List[str],
    texts: List[str],
    top_k: int = 5,
) -> List[Dict[str, Any]]:
    """For each sample (text query i), compute t2m rank of motion i and top-k retrieved motions with
    ids and texts.

    Returns list of dicts: [{"rank": int, "top_k": [{"id": str, "text": str}, ...]}, ...].
    """
    motion_emb = np.asarray(motion_emb).squeeze()
    text_emb = np.asarray(text_emb).squeeze()
    if motion_emb.ndim == 1:
        motion_emb = motion_emb[np.newaxis, :]
    if text_emb.ndim == 1:
        text_emb = text_emb[np.newaxis, :]
    n = motion_emb.shape[0]
    assert text_emb.shape[0] == n and len(sample_ids) == n and len(texts) == n
    scores = get_score_matrix_unit(text_emb, motion_emb)
    out: List[Dict[str, Any]] = []
    for i in range(n):
        row = np.asarray(scores[i])
        order = np.argsort(row)[::-1]
        rank = int(np.where(order == i)[0][0]) + 1
        top_indices = order[:top_k]
        top_k_list = [{"id": sample_ids[j], "text": texts[j]} for j in top_indices]
        out.append({"rank": rank, "top_k": top_k_list})
    return out


class TMR_Metric(Metric):
    def __init__(
        self,
        tmr_model: TMR,
        ranks: List = [1, 2, 3, 5, 10],
        ranks_rounding=2,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.tmr_model = tmr_model
        self.ranks = ranks
        self.ranks_rounding = ranks_rounding

    def clear(self):
        self.saved_metrics = defaultdict(list)
        self.saved_text_latents = []
        self.saved_motion_gen_latents = []
        self.saved_motion_gt_latents = []

    def _compute(
        self,
        motion_rep,
        pred_joints_output: Dict,
        gt_joints_output: Dict,
        text_x_dict: Dict,
        lengths: Tensor,
        **kwargs,
    ) -> Dict:
        pred_posed_joints = pred_joints_output["posed_joints"]
        original_skeleton = motion_rep.skeleton if motion_rep is not None else None
        latents_motion = self.tmr_model.encode_motion(
            pred_posed_joints,
            lengths=lengths,
            original_skeleton=original_skeleton,
            unit_vector=True,
        )
        latents_motion = latents_motion.cpu().numpy()

        if isinstance(text_x_dict, dict) and "texts" in text_x_dict:
            latents_text = self.tmr_model.encode_raw_text(text_x_dict["texts"], unit_vector=True)
        else:
            latents_text = self.tmr_model.encode_text(text_x_dict, unit_vector=True)
        if latents_text.dim() == 1:
            latents_text = latents_text.unsqueeze(0)
        latents_text = latents_text.cpu().numpy()

        self.saved_text_latents.append(latents_text)
        self.saved_motion_gen_latents.append(latents_motion)

        scores_text = get_scores_unit(latents_motion, latents_text)
        output = {"TMR/t2m_sim": scores_text}

        if gt_joints_output is not None and "posed_joints" in gt_joints_output:
            gt_posed_joints = gt_joints_output["posed_joints"]
            gt_latents_motion = self.tmr_model.encode_motion(
                gt_posed_joints,
                lengths=lengths,
                original_skeleton=original_skeleton,
                unit_vector=True,
            )
            gt_latents_motion = gt_latents_motion.cpu().numpy()
            self.saved_motion_gt_latents.append(gt_latents_motion)

            gt_scores_text = get_scores_unit(gt_latents_motion, latents_text)
            scores_motion = get_scores_unit(latents_motion, gt_latents_motion)

            output["TMR/t2m_gt_sim"] = gt_scores_text
            output["TMR/m2m_sim"] = scores_motion

        # pytorch tensors
        for key, val in output.items():
            output[key] = torch.tensor(val)
        return output

    def aggregate(self):
        output = {}
        for key, lst in self.saved_metrics.items():
            output[key] = np.concatenate(lst)

        assert self.saved_text_latents, "Should call the metric at least once."

        text_latents = np.concatenate(self.saved_text_latents)
        motion_gen_latents = np.concatenate(self.saved_motion_gen_latents)

        batch_size = len(text_latents)
        assert text_latents.shape == motion_gen_latents.shape

        scores_t2m = get_score_matrix_unit(text_latents, motion_gen_latents)
        scores_t2t = get_score_matrix_unit(text_latents, text_latents)

        t2m_metrics = contrastive_metrics(
            scores=scores_t2m,
            scores_t2t=scores_t2t,
            threshold=0.99,
            rounding=2,
        )

        for key, val in t2m_metrics.items():
            output["TMR/t2m_R/" + key] = val

        mu_gen, cov_gen = calculate_activation_statistics(motion_gen_latents)
        mu_text, cov_text = calculate_activation_statistics(text_latents)

        fid_gen_text = calculate_frechet_distance(mu_gen, cov_gen, mu_text, cov_text)
        output["TMR/FID/gen_text"] = fid_gen_text

        if self.saved_motion_gt_latents:
            motion_gt_latents = np.concatenate(self.saved_motion_gt_latents)
            assert motion_gt_latents.shape == motion_gen_latents.shape

            scores_m2gm = get_score_matrix_unit(motion_gen_latents, motion_gt_latents)
            scores_t2gm = get_score_matrix_unit(text_latents, motion_gt_latents)

            m2gm_metrics = contrastive_metrics(
                scores=scores_m2gm,
                scores_t2t=scores_t2t,
                threshold=0.99,
                rounding=2,
            )
            for key, val in m2gm_metrics.items():
                output["TMR/m2m_R/" + key] = val

            t2gm_metrics = contrastive_metrics(
                scores=scores_t2gm,
                scores_t2t=scores_t2t,
                threshold=0.99,
                rounding=2,
            )
            for key, val in t2gm_metrics.items():
                output["TMR/t2m_gt_R/" + key] = val

            mu_gt_motion, cov_gt_motion = calculate_activation_statistics(motion_gt_latents)
            fid_gen_motion = calculate_frechet_distance(
                mu_gen,
                cov_gen,
                mu_gt_motion,
                cov_gt_motion,
            )
            output["TMR/FID/gen_gt"] = fid_gen_motion

            fid_gt_text = calculate_frechet_distance(
                mu_gt_motion,
                cov_gt_motion,
                mu_text,
                cov_text,
            )
            output["TMR/FID/gt_text"] = fid_gt_text

        for key, val in output.items():
            if isinstance(val, (int, float, np.integer, np.floating)):
                val = torch.tensor([val for _ in range(batch_size)])

            if isinstance(val, np.ndarray):
                val = torch.from_numpy(val)

            output[key] = val.cpu().float()
        return output


class TMR_EmbeddingMetric(Metric):
    """TMR metrics from precomputed motion and text embeddings (no model load).

    Use in the loop: pass motion_emb and text_emb per sample; aggregate() computes retrieval metrics.
    """

    def __init__(self, ranks_rounding: int = 2, **kwargs):
        super().__init__(**kwargs)
        self.ranks_rounding = ranks_rounding

    def clear(self):
        self.saved_metrics = defaultdict(list)
        self.saved_text_latents = []
        self.saved_motion_gen_latents = []
        self.saved_motion_gt_latents = []

    def _compute(
        self,
        motion_emb=None,
        text_emb=None,
        gt_motion_emb=None,
        **kwargs,
    ) -> Dict:
        if motion_emb is None or text_emb is None:
            return {}
        motion_emb = np.asarray(motion_emb)
        text_emb = np.asarray(text_emb)
        if motion_emb.ndim == 1:
            motion_emb = motion_emb[np.newaxis, :]
        if text_emb.ndim == 1:
            text_emb = text_emb[np.newaxis, :]
        self.saved_text_latents.append(text_emb)
        self.saved_motion_gen_latents.append(motion_emb)
        if gt_motion_emb is not None:
            gt_motion_emb = np.asarray(gt_motion_emb)
            if gt_motion_emb.ndim == 1:
                gt_motion_emb = gt_motion_emb[np.newaxis, :]
            self.saved_motion_gt_latents.append(gt_motion_emb)
        scores = get_scores_unit(motion_emb, text_emb)
        return {"TMR/t2m_sim": torch.tensor(scores, dtype=torch.float32)}

    def aggregate(self):
        output = {}
        for key, lst in self.saved_metrics.items():
            output[key] = np.concatenate(lst)
        if not self.saved_text_latents:
            return output
        text_latents = np.concatenate(self.saved_text_latents)
        motion_gen_latents = np.concatenate(self.saved_motion_gen_latents)
        batch_size = len(text_latents)
        assert text_latents.shape == motion_gen_latents.shape
        scores_t2m = get_score_matrix_unit(text_latents, motion_gen_latents)
        scores_t2t = get_score_matrix_unit(text_latents, text_latents)
        t2m_metrics = contrastive_metrics(
            scores=scores_t2m,
            scores_t2t=scores_t2t,
            threshold=0.99,
            rounding=self.ranks_rounding,
        )
        for key, val in t2m_metrics.items():
            output["TMR/t2m_R/" + key] = val
        if batch_size >= 2:
            mu_gen, cov_gen = calculate_activation_statistics(motion_gen_latents)
            mu_text, cov_text = calculate_activation_statistics(text_latents)
            output["TMR/FID/gen_text"] = calculate_frechet_distance(mu_gen, cov_gen, mu_text, cov_text)
        else:
            output["TMR/FID/gen_text"] = float("nan")
        if self.saved_motion_gt_latents:
            motion_gt_latents = np.concatenate(self.saved_motion_gt_latents)
            assert motion_gt_latents.shape == motion_gen_latents.shape
            scores_m2gm = get_score_matrix_unit(motion_gen_latents, motion_gt_latents)
            scores_t2gm = get_score_matrix_unit(text_latents, motion_gt_latents)
            m2gm_metrics = contrastive_metrics(
                scores=scores_m2gm,
                scores_t2t=scores_t2t,
                threshold=0.99,
                rounding=self.ranks_rounding,
            )
            for key, val in m2gm_metrics.items():
                output["TMR/m2m_R/" + key] = val
            t2gm_metrics = contrastive_metrics(
                scores=scores_t2gm,
                scores_t2t=scores_t2t,
                threshold=0.99,
                rounding=self.ranks_rounding,
            )
            for key, val in t2gm_metrics.items():
                output["TMR/t2m_gt_R/" + key] = val
            if batch_size >= 2:
                mu_gt_motion, cov_gt_motion = calculate_activation_statistics(motion_gt_latents)
                output["TMR/FID/gen_gt"] = calculate_frechet_distance(mu_gen, cov_gen, mu_gt_motion, cov_gt_motion)
                output["TMR/FID/gt_text"] = calculate_frechet_distance(mu_gt_motion, cov_gt_motion, mu_text, cov_text)
            else:
                output["TMR/FID/gen_gt"] = float("nan")
                output["TMR/FID/gt_text"] = float("nan")
        for key, val in output.items():
            if isinstance(val, (int, float, np.integer, np.floating)):
                val = torch.tensor([val for _ in range(batch_size)])
            if isinstance(val, np.ndarray):
                val = torch.from_numpy(val)
            output[key] = val.cpu().float()
        return output


def compute_tmr_retrieval_metrics(
    motion_emb: np.ndarray,
    text_emb: np.ndarray,
    gt_motion_emb: Optional[np.ndarray] = None,
    rounding: int = 2,
) -> Dict[str, float]:
    """Compute TMR retrieval metrics from precomputed embeddings."""
    if motion_emb.shape != text_emb.shape:
        raise ValueError(f"Expected same shape for motion/text embeddings, got {motion_emb.shape} vs {text_emb.shape}")

    scores_t2m = get_score_matrix_unit(text_emb, motion_emb)
    scores_t2t = get_score_matrix_unit(text_emb, text_emb)

    output: Dict[str, float] = {}
    t2m_metrics = contrastive_metrics(
        scores=scores_t2m,
        scores_t2t=scores_t2t,
        threshold=0.99,
        rounding=rounding,
    )
    for key, val in t2m_metrics.items():
        output[f"TMR/t2m_R/{key}"] = float(val)

    n_samples = len(motion_emb)
    if n_samples >= 2:
        mu_gen, cov_gen = calculate_activation_statistics(motion_emb)
        mu_text, cov_text = calculate_activation_statistics(text_emb)
        output["TMR/FID/gen_text"] = float(calculate_frechet_distance(mu_gen, cov_gen, mu_text, cov_text))
    else:
        output["TMR/FID/gen_text"] = float("nan")

    if gt_motion_emb is not None:
        if gt_motion_emb.shape != motion_emb.shape:
            raise ValueError(f"Expected gt motion embeddings shape {motion_emb.shape}, got {gt_motion_emb.shape}")

        scores_m2gm = get_score_matrix_unit(motion_emb, gt_motion_emb)
        scores_t2gm = get_score_matrix_unit(text_emb, gt_motion_emb)

        m2gm_metrics = contrastive_metrics(
            scores=scores_m2gm,
            scores_t2t=scores_t2t,
            threshold=0.99,
            rounding=rounding,
        )
        for key, val in m2gm_metrics.items():
            output[f"TMR/m2m_R/{key}"] = float(val)

        t2gm_metrics = contrastive_metrics(
            scores=scores_t2gm,
            scores_t2t=scores_t2t,
            threshold=0.99,
            rounding=rounding,
        )
        for key, val in t2gm_metrics.items():
            output[f"TMR/t2m_gt_R/{key}"] = float(val)

        if n_samples >= 2:
            mu_gt_motion, cov_gt_motion = calculate_activation_statistics(gt_motion_emb)
            output["TMR/FID/gen_gt"] = float(calculate_frechet_distance(mu_gen, cov_gen, mu_gt_motion, cov_gt_motion))
            output["TMR/FID/gt_text"] = float(calculate_frechet_distance(mu_gt_motion, cov_gt_motion, mu_text, cov_text))
        else:
            output["TMR/FID/gen_gt"] = float("nan")
            output["TMR/FID/gt_text"] = float("nan")

    return output


def all_contrastive_metrics(sims, emb=None, threshold=None, rounding=2, return_cols=False):
    text_selfsim = None
    if emb is not None:
        text_selfsim = emb @ emb.T

    t2m_m, t2m_cols = contrastive_metrics(sims, text_selfsim, threshold, return_cols=True, rounding=rounding)
    m2t_m, m2t_cols = contrastive_metrics(sims.T, text_selfsim, threshold, return_cols=True, rounding=rounding)

    all_m = {}
    for key in t2m_m:
        all_m[f"t2m/{key}"] = t2m_m[key]
        all_m[f"m2t/{key}"] = m2t_m[key]

    all_m["t2m/len"] = float(len(sims))
    all_m["m2t/len"] = float(len(sims[0]))
    if return_cols:
        return all_m, t2m_cols, m2t_cols
    return all_m


def contrastive_metrics(
    scores,
    scores_t2t=None,
    threshold=None,
    rounding=2,
):
    n, m = scores.shape
    assert n == m
    num_queries = n

    dists = -scores
    sorted_dists = np.sort(dists, axis=1)
    # GT is in the diagonal
    gt_dists = np.diag(dists)[:, None]

    if scores_t2t is not None and threshold is not None:
        real_threshold = 2 * threshold - 1
        idx = np.argwhere(scores_t2t > real_threshold)
        partition = np.unique(idx[:, 0], return_index=True)[1]
        # take as GT the minimum score of similar values
        gt_dists = np.minimum.reduceat(dists[tuple(idx.T)], partition)
        gt_dists = gt_dists[:, None]

    rows, cols = np.where((sorted_dists - gt_dists) == 0)  # find column position of GT

    # if there are ties
    if rows.size > num_queries:
        assert np.unique(rows).size == num_queries, "issue in metric evaluation"
        avg_cols = break_ties_average(sorted_dists, gt_dists)
        cols = avg_cols

    msg = "expected ranks to match queries ({} vs {}) "
    assert cols.size == num_queries, msg

    metrics = {}
    vals = [str(x).zfill(2) for x in [1, 2, 3, 5, 10]]
    for val in vals:
        metrics[f"R{val}"] = 100 * float(np.sum(cols < int(val))) / num_queries

    metrics["MedR"] = float(np.median(cols) + 1)
    metrics["len"] = num_queries

    if rounding is not None:
        for key in metrics:
            metrics[key] = round(metrics[key], rounding)
    return metrics


def break_ties_average(sorted_dists, gt_dists):
    # fast implementation, based on this code:
    # https://stackoverflow.com/a/49239335
    locs = np.argwhere((sorted_dists - gt_dists) == 0)

    # Find the split indices
    steps = np.diff(locs[:, 0])
    splits = np.nonzero(steps)[0] + 1
    splits = np.insert(splits, 0, 0)

    # Compute the result columns
    summed_cols = np.add.reduceat(locs[:, 1], splits)
    counts = np.diff(np.append(splits, locs.shape[0]))
    avg_cols = summed_cols / counts
    return avg_cols


def calculate_activation_statistics(activations):
    """
    Params:
    -- activation: num_samples x dim_feat
    Returns:
    -- mu: dim_feat
    -- sigma: dim_feat x dim_feat
    """
    mu = np.mean(activations, axis=0)
    cov = np.cov(activations, rowvar=False)
    return mu, cov


def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance. The Frechet distance between two multivariate
    Gaussians X_1 ~ N(mu_1, C_1)

    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    Stable version by Dougal J. Sutherland.
    Params:
    -- mu1   : Numpy array containing the activations of a layer of the
               inception net (like returned by the function 'get_predictions')
               for generated samples.
    -- mu2   : The sample mean over activations, precalculated on an
               representative dataset set.
    -- sigma1: The covariance matrix over activations for generated samples.
    -- sigma2: The covariance matrix over activations, precalculated on an
               representative dataset set.
    Returns:
    --   : The Frechet Distance.
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths"
    assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions"

    diff = mu1 - mu2

    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ("fid calculation produces singular product; " "adding %s to diagonal of cov estimates") % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            # try again with diagonal %s
            offset = np.eye(sigma1.shape[0]) * eps
            covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
            if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
                m = np.max(np.abs(covmean.imag))
                raise ValueError("Imaginary component {}".format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean


================================================
FILE: kimodo/model/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Kimodo model package: main model class, text encoders, and loading utilities."""

from .common import resolve_target
from .kimodo_model import Kimodo
from .llm2vec import LLM2VecEncoder
from .load_model import load_model
from .loading import (
    AVAILABLE_MODELS,
    DEFAULT_MODEL,
    DEFAULT_TEXT_ENCODER_URL,
    MODEL_NAMES,
    load_checkpoint_state_dict,
)
from .tmr import TMR
from .twostage_denoiser import TwostageDenoiser

__all__ = [
    "Kimodo",
    "LLM2VecEncoder",
    "TMR",
    "TwostageDenoiser",
    "load_model",
    "load_checkpoint_state_dict",
    "resolve_target",
    "AVAILABLE_MODELS",
    "DEFAULT_MODEL",
    "DEFAULT_TEXT_ENCODER_URL",
    "MODEL_NAMES",
]


================================================
FILE: kimodo/model/backbone.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Transformer backbone: padding, masking, and encoder stack for the denoiser."""

import logging
from typing import Optional, Union

import torch
from omegaconf import ListConfig
from pydantic.dataclasses import dataclass
from torch import Tensor, nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer

from kimodo.tools import validate

log = logging.getLogger(__name__)


def pad_x_and_mask_to_fixed_size(x: Tensor, mask: Tensor, size: int):
    """Pad a feature vector x and the mask to always have the same size.

    Args:
        x (torch.Tensor): [B, T, D]
        mask (torch.Tensor): [B, T]
        size (int)
    Returns:
        torch.Tensor: [B, size, D]
        torch.Tensor: [B, size]
    """

    batch_size, cur_max_size, dim = x.shape[0], x.shape[1], x.shape[2]

    if cur_max_size == size:
        # already padded to this size, probably in the collate function
        return x, mask

    if cur_max_size > size:
        # This issue should have been handled in the collate function
        # usefull as a check for test time
        log.warn("The size of the tensor is larger than the maximum size. Cropping the input..")
        cur_max_size = size

    new_x = torch.zeros(
        (batch_size, size, dim),
        dtype=x.dtype,
        device=x.device,
    )
    new_x[:, :cur_max_size] = x

    # same for the mask
    new_mask = torch.zeros(
        (batch_size, size),
        dtype=mask.dtype,
        device=mask.device,
    )
    new_mask[:, :cur_max_size] = mask
    return new_x, new_mask


@dataclass(frozen=True, config=dict(extra="forbid", arbitrary_types_allowed=True))
class TransformerEncoderBlockConfig:
    """Configuration for the transformer encoder backbone."""

    # input features dimension
    input_dim: int
    # output features dimension
    output_dim: int

    # skeleton object
    skeleton: object

    # dimension of the text embeddings
    llm_shape: Union[list[int], ListConfig]

    # mask the text or not
    use_text_mask: bool

    # latent dimension of the model
    latent_dim: int
    # dimension of the feedforward network in transformer
    ff_size: int
    # num layers in transformer
    num_layers: int
    # num heads in transformer
    num_heads: int
    # activation in transformer
    activation: str
    # dropout rate for the transformer
    dropout: float
    # dropout rate for the positional embeddings
    pe_dropout: float
    # use norm first or not
    norm_first: bool = False
    # artificially extend the number of text tokens
    num_text_tokens_override: Optional[int] = None

    # Input first heading angle
    input_first_heading_angle: bool = False


class TransformerEncoderBlock(nn.Module):
    @validate(TransformerEncoderBlockConfig, save_args=True, super_init=True)
    def __init__(self, conf):
        self.nbjoints = self.skeleton.nbjoints
        llm_dim = self.llm_shape[-1]
        self.embed_text = nn.Linear(llm_dim, self.latent_dim)

        self.sequence_pos_encoder = PositionalEncoding(self.latent_dim, self.pe_dropout)

        # maximum number of tokens
        self.num_text_tokens = self.llm_shape[0]
        if self.num_text_tokens_override is not None:
            self.num_text_tokens = self.num_text_tokens_override

        self.embed_timestep = TimestepEmbedder(self.latent_dim, self.sequence_pos_encoder)

        self.input_linear = nn.Linear(self.input_dim, self.latent_dim)
        self.output_linear = nn.Linear(self.latent_dim, self.output_dim)
        self.linear_first_heading_angle = nn.Linear(2, self.latent_dim)

        trans_enc_layer = TransformerEncoderLayer(
            d_model=self.latent_dim,
            nhead=self.num_heads,
            dim_feedforward=self.ff_size,
            dropout=self.dropout,
            activation=self.activation,
            batch_first=True,
            norm_first=self.norm_first,
        )
        self.seqTransEncoder = TransformerEncoder(
            trans_enc_layer,
            num_layers=self.num_layers,
            enable_nested_tensor=False,
        )

    def forward(
        self,
        x: Tensor,
        x_pad_mask: torch.Tensor,
        text_feat: torch.Tensor,
        text_feat_pad_mask: torch.Tensor,
        timesteps: Tensor,
        first_heading_angle: Optional[Tensor] = None,
    ) -> Tensor:
        """
        Args:
            x (torch.Tensor): [B, T, dim_motion] current noisy motion
            x_pad_mask (torch.Tensor): [B, T] attention mask, positions with True are allowed to attend, False are not
            text_feat (torch.Tensor): [B, max_text_len, llm_dim] embedded text prompts
            text_feat_pad_mask (torch.Tensor): [B, max_text_len] attention mask, positions with True are allowed to attend, False are not
            timesteps (torch.Tensor): [B,] current denoising step

        Returns:
            torch.Tensor: [B, T, output_dim]
        """
        batch_size = len(x)
        x = self.input_linear(x)  # [B, T, D]

        # Pad the text tokens + mask to always have the same size == self.num_text_tokens
        # done here if it was not done in the collate function
        if self.num_text_tokens is not None:
            text_feat, text_feat_pad_mask = pad_x_and_mask_to_fixed_size(
                text_feat,
                text_feat_pad_mask,
                self.num_text_tokens,
            )

        # Encode the text features and the time information
        emb_text = self.embed_text(text_feat)  # [B, max_text_len, D]
        emb_time = self.embed_timestep(timesteps)  # [B, 1, D]

        # Create mask for the time information
        time_mask = torch.ones((batch_size, 1), dtype=bool, device=x.device)

        # Create the prefix features (text, time, etc): [B, max_text_len + 1 + etc]
        prefix_feats = torch.cat((emb_text, emb_time), axis=1)

        # Behavior from old code: not use text mask -> True for all the tokens
        if not self.use_text_mask:
            text_feat_pad_mask = torch.ones(
                (batch_size, emb_text.shape[1]),
                dtype=torch.bool,
                device=x.device,
            )

        prefix_mask = torch.cat((text_feat_pad_mask, time_mask), axis=1)

        # add the input first heading angle
        if self.input_first_heading_angle:
            assert first_heading_angle is not None, "The first heading angle is mandatory for this model"
            # cos(angle) / sin(angle)
            first_heading_angle_feats = torch.stack(
                [
                    torch.cos(first_heading_angle),
                    torch.sin(first_heading_angle),
                ],
                axis=-1,
            )

            first_heading_angle_feats = self.linear_first_heading_angle(first_heading_angle_feats)
            first_heading_angle_feats = first_heading_angle_feats[:, None]  # for cat
            first_heading_angle_mask = torch.ones(
                (batch_size, 1),
                dtype=bool,
                device=x.device,
            )
            prefix_feats = torch.cat((prefix_feats, first_heading_angle_feats), axis=1)
            prefix_mask = torch.cat((prefix_mask, first_heading_angle_mask), axis=1)

        # compute the number of prefix features
        pose_start_ind = prefix_feats.shape[1]

        # Concatenate prefix and x: [B, len(prefix) + T, D]
        xseq = torch.cat((prefix_feats, x), axis=1)

        # Concatenate the masks and negate them: [B, len(prefix) + T]
        src_key_padding_mask = ~torch.cat((prefix_mask, x_pad_mask), axis=1)

        # Add positional encoding
        xseq = self.sequence_pos_encoder(xseq)

        # Input to the transformer and keep the motion indexes
        if isinstance(self.seqTransEncoder, nn.TransformerEncoder):
            assert not self.seqTransEncoder.use_nested_tensor, "Flash attention should be disabled due to bug!"

        output = self.seqTransEncoder(
            xseq,
            src_key_padding_mask=src_key_padding_mask,
        )
        output = output[:, pose_start_ind:]  # [B, T, D]
        output = self.output_linear(output)  # [B, T, OD]
        return output


class PositionalEncoding(nn.Module):
    """Non-learned positional encoding."""

    def __init__(
        self,
        d_model: int,
        dropout: Optional[float] = 0.1,
        max_len: Optional[int] = 5000,
    ):
        """
        Args:
            d_model (int): input dim
            dropout (Optional[float] = 0.1): dropout probability on output
            max_len (Optional[int] = 5000): maximum sequence length
        """
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)

        # Note: have to replace torch.exp() and math.log() with torch.pow()
        # due to MKL exp() and ln() throws floating point exceptions on certain CPUs
        # see corresponding commit and MR
        div_term = torch.pow(10000.0, -torch.arange(0, d_model, 2).float() / d_model)
        # div_term = torch.exp(
        #     torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model)
        # )

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # [1, T, D]

        self.register_buffer("pe", pe, persistent=False)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Apply positional encoding to input sequence.

        Args:
            x (torch.Tensor): [B, T, D] input motion sequence

        Returns:
            torch.Tensor: [B, T, D] input motion with PE added to it (and optionally dropout)
        """
        x = x + self.pe[:, : x.shape[1], :]
        return self.dropout(x)


class TimestepEmbedder(nn.Module):
    """Encoder for diffusion step."""

    def __init__(self, latent_dim: int, sequence_pos_encoder: PositionalEncoding):
        """
        Args:
            latent_dim (int): dim to encode to
            sequence_pos_encoder (PositionalEncoding): the PE to use on timesteps
        """
        super().__init__()
        self.latent_dim = latent_dim
        self.sequence_pos_encoder = sequence_pos_encoder

        time_embed_dim = self.latent_dim
        self.time_embed = nn.Sequential(
            nn.Linear(self.latent_dim, time_embed_dim),
            nn.SiLU(),
            nn.Linear(time_embed_dim, time_embed_dim),
        )

    def forward(self, timesteps: torch.Tensor) -> torch.Tensor:
        """Embed timesteps by adding PE then going through linear layers.

        Args:
            timesteps (torch.Tensor): [B]

        Returns:
            torch.Tensor: [B, 1, D]
        """
        return self.time_embed(self.sequence_pos_encoder.pe.transpose(0, 1)[timesteps])


================================================
FILE: kimodo/model/cfg.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Classifier-free guidance wrapper for the denoiser at sampling time."""

from typing import Dict, Optional, Tuple, Union

import torch
import torch.nn as nn

CFG_TYPES = ["nocfg", "regular", "separated"]


class ClassifierFreeGuidedModel(nn.Module):
    """Wrapper around denoiser to use classifier-free guidance at sampling time."""

    def __init__(self, model: nn.Module, cfg_type: Optional[str] = "separated"):
        """Wrap the denoiser for classifier-free guidance; cfg_type in CFG_TYPES (e.g. 'regular',
        'nocfg')."""
        super().__init__()
        self.model = model
        assert cfg_type in CFG_TYPES, f"Invalid cfg_type: {cfg_type}"
        self.cfg_type_default = cfg_type

    def forward(
        self,
        cfg_weight: Union[float, Tuple[float, float]],
        x: torch.Tensor,
        x_pad_mask: torch.Tensor,
        text_feat: torch.Tensor,
        text_feat_pad_mask: torch.Tensor,
        timesteps: torch.Tensor,
        first_heading_angle: Optional[torch.Tensor] = None,
        motion_mask: Optional[torch.Tensor] = None,
        observed_motion: Optional[torch.Tensor] = None,
        cfg_type: Optional[str] = None,
    ) -> torch.Tensor:
        """
        Args:
            cfg_weight (float): guidance weight float or tuple of floats with (text, constraint) weights if using separated cfg
            x (torch.Tensor): [B, T, dim_motion] current noisy motion
            x_pad_mask (torch.Tensor): [B, T] attention mask, positions with True are allowed to attend, False are not
            text_feat (torch.Tensor): [B, max_text_len, llm_dim] embedded text prompts
            text_feat_pad_mask (torch.Tensor): [B, max_text_len] attention mask, positions with True are allowed to attend, False are not
            timesteps (torch.Tensor): [B,] current denoising step
            motion_mask
            observed_motion
            neutral_joints (torch.Tensor): [B, nbjoints] The neutral joints of the motions

        Returns:
            torch.Tensor: same size as input x
        """

        if cfg_type is None:
            cfg_type = self.cfg_type_default

        assert cfg_type in CFG_TYPES, f"Invalid cfg_type: {cfg_type}"

        # batched conditional and uncond pass together
        if cfg_type == "nocfg":
            return self.model(
                x,
                x_pad_mask,
                text_feat,
                text_feat_pad_mask,
                timesteps,
                first_heading_angle=first_heading_angle,
                motion_mask=motion_mask,
                observed_motion=observed_motion,
            )
        elif cfg_type == "regular":
            assert isinstance(cfg_weight, (float, int)), "cfg_weight must be a single float for regular CFG"
            # out_uncond + w * (out_text_and_constraint - out_uncond)
            text_feat = torch.concatenate([text_feat, 0 * text_feat], dim=0)
            if motion_mask is not None:
                motion_mask = torch.concatenate([motion_mask, 0 * motion_mask], dim=0)
            if observed_motion is not None:
                observed_motion = torch.concatenate([observed_motion, observed_motion], dim=0)
            if first_heading_angle is not None:
                first_heading_angle = torch.concatenate([first_heading_angle, first_heading_angle], dim=0)

            out_cond_uncond = self.model(
                torch.concatenate([x, x], dim=0),
                torch.concatenate([x_pad_mask, x_pad_mask], dim=0),
                text_feat,
                torch.concatenate([text_feat_pad_mask, False * text_feat_pad_mask], dim=0),
                torch.concatenate([timesteps, timesteps], dim=0),
                first_heading_angle=first_heading_angle,
                motion_mask=motion_mask,
                observed_motion=observed_motion,
            )

            out, out_uncond = torch.chunk(out_cond_uncond, 2)
            out_new = out_uncond + (cfg_weight * (out - out_uncond))
        elif cfg_type == "separated":
            assert len(cfg_weight) == 2, "cfg_weight must be a tuple of two floats for separated CFG"
            # out_uncond + w_text * (out_text - out_uncond) + w_constraint * (out_constraint - out_uncond)
            text_feat = torch.concatenate([text_feat, 0 * text_feat, 0 * text_feat], dim=0)
            if motion_mask is not None:
                motion_mask = torch.concatenate([0 * motion_mask, motion_mask, 0 * motion_mask], dim=0)
            if observed_motion is not None:
                observed_motion = torch.concatenate([observed_motion, observed_motion, observed_motion], dim=0)
            if first_heading_angle is not None:
                first_heading_angle = torch.concatenate(
                    [first_heading_angle, first_heading_angle, first_heading_angle],
                    dim=0,
                )

            out_cond_uncond = self.model(
                torch.concatenate([x, x, x], dim=0),
                torch.concatenate([x_pad_mask, x_pad_mask, x_pad_mask], dim=0),
                text_feat,
                torch.concatenate(
                    [
                        text_feat_pad_mask,
                        False * text_feat_pad_mask,
                        False * text_feat_pad_mask,
                    ],
                    dim=0,
                ),
                torch.concatenate([timesteps, timesteps, timesteps], dim=0),
                first_heading_angle=first_heading_angle,
                motion_mask=motion_mask,
                observed_motion=observed_motion,
            )

            out_text, out_constraint, out_uncond = torch.chunk(out_cond_uncond, 3)
            out_new = (
                out_uncond + (cfg_weight[0] * (out_text - out_uncond)) + (cfg_weight[1] * (out_constraint - out_uncond))
            )
        else:
            raise ValueError(f"Invalid cfg_type: {cfg_type}")

        return out_new


================================================
FILE: kimodo/model/common.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Config hydration: env vars, _target_ resolution, and recursive instantiation."""

import importlib
import os


def get_env_var(name: str, default=None):
    """Read env var by name and by lowercased name; return default if neither set."""
    return os.getenv(name, os.getenv(name.lower(), default))


def resolve_target(target: str):
    """Import module and return the attribute named by a dotted path (e.g. 'pkg.mod.Class')."""
    module_name, attr_name = target.rsplit(".", 1)
    module = importlib.import_module(module_name)
    return getattr(module, attr_name)


def materialize_value(value):
    """Recursively turn dicts with '_target_' into instances; lists/dicts traversed; leaves
    unchanged."""
    if isinstance(value, dict):
        if "_target_" in value:
            return instantiate_from_dict(value)
        return {k: materialize_value(v) for k, v in value.items()}
    if isinstance(value, list):
        return [materialize_value(v) for v in value]
    return value


def instantiate_from_dict(node, overrides=None):
    """Build an instance from a config dict: '_target_' gives the class, other keys are kwargs; overrides merged in."""
    if not isinstance(node, dict) or "_target_" not in node:
        raise ValueError("Config node must be a dict with a '_target_' key.")

    target = resolve_target(node["_target_"])
    kwargs = {}
    for key, value in node.items():
        if key == "_target_":
            continue
        kwargs[key] = materialize_value(value)

    if overrides:
        kwargs.update({k: v for k, v in overrides.items() if v is not None})

    return target(**kwargs)


================================================
FILE: kimodo/model/diffusion.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Diffusion process and DDIM sampling for motion generation."""

import math
from typing import Optional, Tuple

import torch
from torch import nn


def get_beta_schedule(
    num_diffusion_timesteps: int,
    max_beta: Optional[float] = 0.999,
) -> torch.Tensor:
    """Get cosine beta schedule."""

    def alpha_bar(t):
        return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2

    betas = []
    for i in range(num_diffusion_timesteps):
        t1 = i / num_diffusion_timesteps
        t2 = (i + 1) / num_diffusion_timesteps
        betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
    return torch.tensor(betas, dtype=torch.float)


class Diffusion(torch.nn.Module):
    """Cosine-schedule diffusion process: betas, alphas, and DDIM step mapping."""

    def __init__(self, num_base_steps: int):
        """Set up cosine beta schedule and precompute diffusion variables for num_base_steps."""
        super().__init__()
        self.num_base_steps = num_base_steps
        betas_base = get_beta_schedule(self.num_base_steps)
        self.register_buffer("betas_base", betas_base, persistent=False)
        alphas_cumprod_base = torch.cumprod(1.0 - self.betas_base, dim=0)
        self.register_buffer("alphas_cumprod_base", alphas_cumprod_base, persistent=False)
        use_timesteps, _ = self.space_timesteps(self.num_base_steps)
        self.calc_diffusion_vars(use_timesteps)

    def extra_repr(self) -> str:
        return f"num_base_steps={self.num_base_steps}"

    @property
    def device(self):
        return self.betas_base.device

    def space_timesteps(self, num_denoising_steps: int) -> Tuple[torch.Tensor, torch.Tensor]:
        """Return (use_timesteps, map_tensor) for a subsampled denoising schedule of
        num_denoising_steps."""
        nsteps_train = self.num_base_steps
        frac_stride = (nsteps_train - 1) / max(1, num_denoising_steps - 1)
        use_timesteps = torch.round(torch.arange(nsteps_train, device=self.device) * frac_stride).to(torch.long)
        use_timesteps = torch.clamp(use_timesteps, max=nsteps_train - 1)
        map_tensor = torch.arange(nsteps_train, device=self.device, dtype=torch.long)[use_timesteps]
        return use_timesteps, map_tensor

    def calc_diffusion_vars(self, use_timesteps: torch.Tensor) -> None:
        """Update buffers (betas, alphas, alphas_cumprod, etc.) for the given subsampled
        timesteps."""
        alphas_cumprod = self.alphas_cumprod_base[use_timesteps]
        last_alpha_cumprod = torch.cat([torch.tensor([1.0]).to(alphas_cumprod), alphas_cumprod[:-1]])
        betas = 1.0 - alphas_cumprod / last_alpha_cumprod
        self.register_buffer("betas", betas, persistent=False)

        alphas = 1.0 - self.betas
        self.register_buffer("alphas", alphas, persistent=False)
        alphas_cumprod = torch.cumprod(self.alphas, dim=0)
        alphas_cumprod = torch.clamp(alphas_cumprod, min=1e-9)
        self.register_buffer("alphas_cumprod", alphas_cumprod, persistent=False)

        alphas_cumprod_prev = torch.cat([torch.tensor([1.0]).to(self.alphas_cumprod), self.alphas_cumprod[:-1]])
        self.register_buffer("alphas_cumprod_prev", alphas_cumprod_prev, persistent=False)

        sqrt_recip_alphas_cumprod = torch.rsqrt(self.alphas_cumprod)
        self.register_buffer("sqrt_recip_alphas_cumprod", sqrt_recip_alphas_cumprod, persistent=False)

        sqrt_recipm1_alphas_cumprod = torch.rsqrt(self.alphas_cumprod / (1.0 - self.alphas_cumprod))
        self.register_buffer("sqrt_recipm1_alphas_cumprod", sqrt_recipm1_alphas_cumprod, persistent=False)

        posterior_variance = self.betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
        self.register_buffer("posterior_variance", posterior_variance, persistent=False)

        sqrt_alphas_cumprod = torch.rsqrt(1.0 / self.alphas_cumprod)
        self.register_buffer("sqrt_alphas_cumprod", sqrt_alphas_cumprod, persistent=False)

        sqrt_one_minus_alphas_cumprod = torch.rsqrt(1.0 / (1.0 - self.alphas_cumprod))
        self.register_buffer(
            "sqrt_one_minus_alphas_cumprod",
            sqrt_one_minus_alphas_cumprod,
            persistent=False,
        )

    def q_sample(
        self,
        x_start: torch.Tensor,
        t: torch.Tensor,
        noise: torch.Tensor = None,
    ):
        if noise is None:
            noise = torch.randn_like(x_start)
        assert noise.shape == x_start.shape

        xt = (
            self.sqrt_alphas_cumprod[t, None, None] * x_start
            + self.sqrt_one_minus_alphas_cumprod[t, None, None] * noise
        )
        return xt


class DDIMSampler(nn.Module):
    """Deterministic DDIM sampler (eta = 0)."""

    def __init__(self, diffusion: Diffusion):
        super().__init__()
        self.diffusion = diffusion

    def __call__(
        self,
        use_timesteps: torch.Tensor,
        x_t: torch.Tensor,
        pred_xstart: torch.Tensor,
        t: torch.Tensor,
    ) -> torch.Tensor:
        self.diffusion.calc_diffusion_vars(use_timesteps)
        eps = (
            self.diffusion.sqrt_recip_alphas_cumprod[t, None, None] * x_t - pred_xstart
        ) / self.diffusion.sqrt_recipm1_alphas_cumprod[t, None, None]
        alpha_bar_prev = self.diffusion.alphas_cumprod_prev[t, None, None]
        x = pred_xstart * torch.sqrt(alpha_bar_prev) + torch.sqrt(1 - alpha_bar_prev) * eps
        return x


================================================
FILE: kimodo/model/kimodo_model.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Kimodo model: denoiser, text encoder, diffusion sampling, and post-processing."""

import logging
from typing import Dict, List, Optional, Tuple, Union

import torch
from torch import nn
from tqdm.auto import tqdm

from kimodo.constraints import EndEffectorConstraintSet, FullBodyConstraintSet
from kimodo.motion_rep.feature_utils import compute_heading_angle, length_to_mask
from kimodo.postprocess import post_process_motion
from kimodo.sanitize import sanitize_texts
from kimodo.skeleton import SOMASkeleton30
from kimodo.tools import to_numpy

from .cfg import ClassifierFreeGuidedModel
from .diffusion import DDIMSampler, Diffusion

log = logging.getLogger(__name__)


class Kimodo(nn.Module):
    """Helper class for test time."""

    def __init__(
        self,
        denoiser: nn.Module,
        text_encoder: nn.Module,
        num_base_steps: int,
        device: Optional[Union[str, torch.device]] = None,
        cfg_type: Optional[str] = "separated",
    ):
        super().__init__()

        self.denoiser = denoiser.eval()

        if cfg_type is None:
            cfg_type = "nocfg"

        # Add Classifier-free guidance to the model if needed
        self.denoiser = ClassifierFreeGuidedModel(self.denoiser, cfg_type=cfg_type)

        self.motion_rep = denoiser.motion_rep
        self.skeleton = self.motion_rep.skeleton

        self.fps = denoiser.motion_rep.fps

        self.diffusion = Diffusion(num_base_steps=num_base_steps)
        self.sampler = DDIMSampler(self.diffusion)
        self.text_encoder = text_encoder

        self.device = device
        # for classifier-free guidance

        self.to(device)

    @property
    def output_skeleton(self):
        """Skeleton used for model output (somaskel77 for SOMA, else unchanged)."""
        if isinstance(self.skeleton, SOMASkeleton30):
            return self.skeleton.somaskel77
        return self.skeleton

    def train(self, mode: bool):
        self.denoiser.train(mode)
        return self

    def eval(self):
        self.denoiser.eval()
        return self

    def denoising_step(
        self,
        motion: torch.Tensor,
        pad_mask: torch.Tensor,
        text_feat: torch.Tensor,
        text_pad_mask: torch.Tensor,
        t: torch.Tensor,
        first_heading_angle: Optional[torch.Tensor],
        motion_mask: torch.Tensor,
        observed_motion: torch.Tensor,
        num_denoising_steps: torch.Tensor,
        cfg_weight: Union[float, Tuple[float, float]],
        guide_masks: Optional[Dict] = None,
        cfg_type: Optional[str] = None,
    ) -> torch.Tensor:
        """Single denoising step.

        Returns:
            torch.Tensor: [B, T, D] noisy motion input to t-1
        """
        # subsample timesteps
        #   NOTE: do this at every step due to ONNX export, i.e. num_samp_stepsmay change dynamically when
        #       running onnx version so need to account for that.
        num_denoising_steps = num_denoising_steps[0]
        use_timesteps, map_tensor = self.diffusion.space_timesteps(num_denoising_steps)
        self.diffusion.calc_diffusion_vars(use_timesteps)

        # first compute initial clean prediction from denoiser
        t_map = map_tensor[t]

        with torch.inference_mode():
            pred_clean = self.denoiser(
                cfg_weight,
                motion,
                pad_mask,
                text_feat,
                text_pad_mask,
                t_map,
                first_heading_angle,
                motion_mask,
                observed_motion,
                cfg_type=cfg_type,
            )

        # sampler computes next step noisy motion
        x_tm1 = self.sampler(use_timesteps, motion, pred_clean, t)
        return x_tm1

    def _multiprompt(
        self,
        prompts: list[str],
        num_frames: int | list[int],
        num_denoising_steps: int,
        constraint_lst: Optional[list] = [],
        cfg_weight: Optional[float] = [2.0, 2.0],
        num_samples: Optional[int] = None,
        cfg_type: Optional[str] = None,
        return_numpy: bool = False,
        first_heading_angle: Optional[torch.Tensor] = None,
        # for transitioning
        num_transition_frames: int = 5,
        # for postprocess
        post_processing: bool = False,
        root_margin: float = 0.04,
        # progress bar
        progress_bar=tqdm,
    ) -> torch.Tensor:
        device = self.device

        bs = num_samples
        texts = sanitize_texts(prompts)

        if isinstance(num_frames, int):
            # same duration for all the segments
            num_frames = [num_frames for _ in range(num_samples)]

        tosqueeze = False
        if num_samples is None:
            num_samples = 1
            tosqueeze = True

        if constraint_lst is None:
            constraint_lst = []

        # Generate one chunck at a time
        current_frame = 0
        generated_motions = []

        for idx, (text, num_frame) in enumerate(zip(texts, num_frames)):
            texts_bs = [text for _ in range(num_samples)]

            lengths = torch.tensor(
                [num_frame for _ in range(num_samples)],
                device=device,
            )

            is_first_motion = not generated_motions

            observed_motion, motion_mask = None, None

            # filter the constraint_lst to only keep the relevent ones
            constraint_lst_base = [
                constraint.crop_move(current_frame, current_frame + num_frame) for constraint in constraint_lst
            ]  # this move temporally but not spatially

            observed_motion, motion_mask = self.motion_rep.create_conditions_from_constraints_batched(
                constraint_lst_base,
                lengths,
                to_normalize=False,  # don't normalize yet, it needs to be moved around
                device=device,
            )

            if not is_first_motion:
                nb_transition_frames = num_transition_frames

                if nb_transition_frames < 1:
                    raise ValueError(f"num_transition_frames must be at least 1, got {nb_transition_frames}")

                latest_motions = generated_motions.pop()
                # remove the transition part of A (will be put back afterward)
                generated_motions.append(latest_motions[:, :-nb_transition_frames])
                latest_frames = latest_motions[:, -nb_transition_frames:]

                last_output = self.motion_rep.inverse(
                    latest_frames,
                    is_normalized=False,
                    return_numpy=False,
                )
                smooth_root_2d = last_output["smooth_root_pos"][..., [0, 2]]

                # add constraints at the begining to allow natural transitions
                constraint_lst_transition = []
                for batch_id in range(bs):
                    new_constraint = FullBodyConstraintSet(
                        self.skeleton,
                        torch.arange(num_transition_frames),
                        last_output["posed_joints"][batch_id, :num_transition_frames],
                        last_output["global_rot_mats"][batch_id, :num_transition_frames],
                        smooth_root_2d[batch_id, :num_transition_frames],
                    )
                    # separate end-effector constraint to capture hand/feet rotations
                    new_ee_constraint = EndEffectorConstraintSet(
                        self.skeleton,
                        torch.arange(num_transition_frames),
                        last_output["posed_joints"][batch_id, :num_transition_frames],
                        last_output["global_rot_mats"][batch_id, :num_transition_frames],
                        smooth_root_2d[batch_id, :num_transition_frames],
                        joint_names=["LeftHand", "RightHand", "LeftFoot", "RightFoot"],
                    )

                    constraint_lst_transition.append([new_constraint, new_ee_constraint])

                transition_lengths = torch.tensor(
                    [nb_transition_frames for _ in range(num_samples)],
                    device=device,
                )

                observed_motion_transition, motion_mask_transition = (
                    self.motion_rep.create_conditions_from_constraints_batched(
                        constraint_lst_transition,
                        transition_lengths,
                        to_normalize=False,  # don't normalize yet
                        device=device,
                    )
                )

                # concatenate the obversed motion / motion mask
                observed_motion = torch.cat([observed_motion_transition, observed_motion], axis=1)
                motion_mask = torch.cat([motion_mask_transition, motion_mask], axis=1)

                # we need to move each observed motion in the batch to the new starting points
                last_smooth_root_2d = smooth_root_2d[:, 0]
                observed_motion = self.motion_rep.translate_2d(
                    observed_motion, -last_smooth_root_2d
                )  # equivalent to:  self.motion_rep.translate_2d_to_zero(observed_motion)

                # remove dummy values after moving
                observed_motion = observed_motion * motion_mask

                lengths = lengths + transition_lengths
                first_heading_angle = compute_heading_angle(last_output["posed_joints"], self.skeleton)[:, 0]
            else:
                if first_heading_angle is None:
                    # Start at 0 angle, but this will change afterward
                    first_heading_angle = torch.tensor([0.0] * bs, device=device)
                else:
                    first_heading_angle = torch.as_tensor(first_heading_angle, device=device)
                    if first_heading_angle.numel() == 1:
                        first_heading_angle = first_heading_angle.repeat(bs)

            observed_motion = self.motion_rep.normalize(observed_motion)

            max_frames = max(lengths)
            motion_pad_mask = length_to_mask(lengths)

            motion = self._generate(
                texts_bs,
                max_frames,
                num_denoising_steps=num_denoising_steps,
                pad_mask=motion_pad_mask,
                first_heading_angle=first_heading_angle,
                motion_mask=motion_mask,
                observed_motion=observed_motion,
                cfg_weight=cfg_weight,
                cfg_type=cfg_type,
            )

            motion = self.motion_rep.unnormalize(motion)

            if not is_first_motion:
                motion_with_transition = self.motion_rep.translate_2d(
                    motion,
                    last_smooth_root_2d,
                )

                if post_processing:
                    # Per-segment postprocessing: inverse, postprocess, re-encode.
                    # The full transition+segment is postprocessed together so the
                    # transition constraints keep the junction smooth.
                    seg_output = self.motion_rep.inverse(
                        motion_with_transition, is_normalized=False, return_numpy=False,
                    )
                    seg_constraints = [list(cl) for cl in constraint_lst_transition]
                    for bi in range(bs):
                        seg_constraints[bi].extend(
                            [c.crop_move(current_frame - nb_transition_frames,
                                         current_frame - nb_transition_frames + num_frame + nb_transition_frames)
                             for c in constraint_lst]
                        )
                    corrected = post_process_motion(
                        seg_output["local_rot_mats"],
                        seg_output["root_positions"],
                        seg_output["foot_contacts"],
                        self.skeleton,
                        seg_constraints,
                        root_margin=root_margin,
                    )
                    seg_output.update(corrected)
                    motion = self.motion_rep(
                        seg_output["local_rot_mats"],
                        seg_output["root_positions"],
                        to_normalize=False,
                        lengths=lengths,
                    )
                else:
                    motion = motion_with_transition[:, num_transition_frames:]
                    transition_frames = motion_with_transition[:, :num_transition_frames]

                    # linearly combine the previously generated transitions with the newly generated ones
                    alpha = torch.linspace(1, 0, num_transition_frames, device=device)[:, None]
                    new_transition_frames = (
                        latest_frames[:, :num_transition_frames] * alpha + (1 - alpha) * transition_frames
                    )

                    # add new transitions frames for A (merging with B prediction of the history)
                    generated_motions.append(new_transition_frames)

            elif post_processing:
                # First segment: postprocess immediately
                seg_output = self.motion_rep.inverse(
                    motion, is_normalized=False, return_numpy=False,
                )
                seg_constraints = constraint_lst_base if constraint_lst_base else []
                corrected = post_process_motion(
                    seg_output["local_rot_mats"],
                    seg_output["root_positions"],
                    seg_output["foot_contacts"],
                    self.skeleton,
                    seg_constraints,
                    root_margin=root_margin,
                )
                seg_output.update(corrected)
                motion = self.motion_rep(
                    seg_output["local_rot_mats"],
                    seg_output["root_positions"],
                    to_normalize=False,
                    lengths=lengths,
                )

            generated_motions.append(motion)
            current_frame += num_frame

        generated_motions = torch.cat(generated_motions, axis=1)  # temporal axis (b, t, d)

        if tosqueeze:
            generated_motions = generated_motions[0]

        output = self.motion_rep.inverse(
            generated_motions,
            is_normalized=False,
            return_numpy=False,
        )

        # Post-processing: already applied per-segment inside the loop above,
        # so no additional post-processing pass is needed here.

        # Convert SOMA output to somaskel77 for external API
        if isinstance(self.skeleton, SOMASkeleton30):
            output = self.skeleton.output_to_SOMASkeleton77(output)

        # Convert to numpy if requested
        if return_numpy:
            output = to_numpy(output)
        return output

    def __call__(
        self,
        prompts: str | list[str],
        num_frames: int | list[int],
        num_denoising_steps: int,
        multi_prompt: bool = False,
        constraint_lst: Optional[list] = [],
        cfg_weight: Optional[float] = [2.0, 2.0],
        num_samples: Optional[int] = None,
        cfg_type: Optional[str] = None,
        return_numpy: bool = False,
        first_heading_angle: Optional[torch.Tensor] = None,
        # for transitioning
        num_transition_frames: int = 5,
        # for postprocess
        post_processing: bool = False,
        root_margin: float = 0.04,
        # progress bar
        progress_bar=tqdm,
    ) -> dict:
        """Generate motion from text prompts and optional kinematic constraints.

        When a single prompt/num_frames pair is given, one motion is generated.
        Passing lists of prompts and/or num_frames produces a batch of
        independent motions. With ``multi_prompt=True``, the prompts are
        treated as sequential segments that are generated and stitched together
        with smooth transitions.

        Args:
            prompts: One or more text descriptions of the desired motion.
                A single string generates one sample; a list generates a batch
                (or sequential segments when ``multi_prompt=True``).
            num_frames: Duration of the generated motion in frames.  Can be a
                single int applied to every prompt or a per-prompt list.
            num_denoising_steps: Number of DDIM denoising steps.  More steps
                generally improve quality at the cost of speed.
            multi_prompt: If ``True``, treat ``prompts`` as an ordered sequence
                of segments and concatenate them with transitions.
            constraint_lst: Per-sample list of kinematic constraints (e.g.
                keyframe poses, end-effector targets, 2-D paths).  Pass an
                empty list for unconstrained generation.
            cfg_weight: Classifier-free guidance scale(s).  A two-element list
                ``[text_cfg, constraint_cfg]`` controls text and constraint
                guidance independently.
            num_samples: Number of samples to generate.
            cfg_type: Override the default CFG strategy set at init
                (e.g. ``"separated"``).
            return_numpy: If ``True``, convert all output tensors to numpy
                arrays.
            first_heading_angle: Initial body heading in radians.  Shape
                ``(B,)`` or scalar.  Defaults to ``0`` (facing +Z).
            num_transition_frames: Number of overlapping frames used to blend
                consecutive segments in multi-prompt mode.
            post_processing: If ``True``, apply post-processing
                (foot-skate cleanup and constraint enforcement).
            root_margin: Horizontal margin (in meters) used by the post-processor
                to determine when to correct root motion. When root deviates more than
                margin from the constraint, the post-processor will correct it.
            progress_bar: Callable wrapping an iterable to display progress
                (default: ``tqdm``).  Pass a no-op to silence output.

        Returns:
            dict: A dictionary of motion tensors (or numpy arrays if
            ``return_numpy=True``) with the following keys:

            - ``local_rot_mats`` – Local joint rotations as rotation matrices.
            - ``global_rot_mats`` – Global joint rotations as rotation matrices.
            - ``posed_joints`` – Joint positions in world space.
            - ``root_positions`` – Root joint positions.
            - ``smooth_root_pos`` – Smoothed root trajectory.
            - ``foot_contacts`` – Boolean foot-contact labels [left heel, left toe, right heel, right toe].
            - ``global_root_heading`` – Root heading angle over time.
        """
        device = self.device

        if multi_prompt:
            # multi prompt generation
            return self._multiprompt(
                prompts,
                num_frames,
                num_denoising_steps,
                constraint_lst,
                cfg_weight,
                num_samples,
                cfg_type,
                return_numpy,
                first_heading_angle,
                num_transition_frames,
                post_processing,
                root_margin,
                progress_bar,
            )

        # Input checking
        tosqueeze = False
        if isinstance(prompts, list) and isinstance(num_frames, list):
            assert len(prompts) == len(num_frames), "The number of prompts should match the number of num_frames."
            num_samples = len(prompts)
        elif isinstance(prompts, list):
            num_samples = len(prompts)
            num_frames = [num_frames for _ in range(num_samples)]
        elif isinstance(num_frames, list):
            num_samples = len(num_frames)
            prompts = [prompts for _ in range(num_samples)]
        else:
            if num_samples is None:
                tosqueeze = True
                num_samples = 1
            prompts = [prompts for _ in range(num_samples)]
            num_frames = [num_frames for _ in range(num_samples)]

        bs = num_samples
        texts = sanitize_texts(prompts)

        lengths = torch.tensor(
            num_frames,
            device=device,
        )
        max_frames = max(lengths)
        motion_pad_mask = length_to_mask(lengths)

        if first_heading_angle is None:
            # Start at 0 angle
            first_heading_angle = torch.tensor([0.0] * bs, device=device)
        else:
            first_heading_angle = torch.as_tensor(first_heading_angle, device=device)
            if first_heading_angle.numel() == 1:
                first_heading_angle = first_heading_angle.repeat(bs)

        observed_motion, motion_mask = None, None
        if constraint_lst:
            observed_motion, motion_mask = self.motion_rep.create_conditions_from_constraints_batched(
                constraint_lst,
                lengths,
                to_normalize=True,
                device=device,
            )

        motion = self._generate(
            texts,
            max_frames,
            num_denoising_steps=num_denoising_steps,
            pad_mask=motion_pad_mask,
            first_heading_angle=first_heading_angle,
            motion_mask=motion_mask,
            observed_motion=observed_motion,
            cfg_weight=cfg_weight,
            cfg_type=cfg_type,
            progress_bar=progress_bar,
        )

        if tosqueeze:
            motion = motion[0]

        output = self.motion_rep.inverse(
            motion,
            is_normalized=True,
            return_numpy=False,  # Keep as tensor for potential post-processing
        )

        # Apply post-processing if requested
        if post_processing:
            corrected = post_process_motion(
                output["local_rot_mats"],
                output["root_positions"],
                output["foot_contacts"],
                self.skeleton,
                constraint_lst,
                root_margin=root_margin,
            )
            # key frame outputs / foot contacts are not changed
            output.update(corrected)

        # Convert SOMA output to somaskel77 for external API
        if isinstance(self.skeleton, SOMASkeleton30):
            output = self.skeleton.output_to_SOMASkeleton77(output)

        # Convert to numpy if requested
        if return_numpy:
            output = to_numpy(output)
        return output

    def _generate(
        self,
        texts: List[str],
        max_frames: int,
        num_denoising_steps: int,
        pad_mask: torch.Tensor,
        first_heading_angle: Optional[torch.Tensor],
        motion_mask: torch.Tensor,
        observed_motion: torch.Tensor,
        cfg_weight: Optional[float] = 2.0,
        text_feat: Optional[torch.Tensor] = None,
        text_pad_mask: Optional[torch.Tensor] = None,
        guide_masks: Optional[Dict] = None,
        cfg_type: Optional[str] = None,
        progress_bar=tqdm,
    ) -> torch.Tensor:
        """Sample full denoising loop.

        Args:
            texts (List[str]): batch of text prompts to use for sampling (if text_feat is not passed in)
        """

        device = self.device
        if text_feat is None:
            assert text_pad_mask is None
            log.info("Encoding text...")
            text_feat, text_length = self.text_encoder(texts)
            text_feat = text_feat.to(device)

            # handle empty string (set to zero)
            empty_text_mask = [len(text.strip()) == 0 for text in texts]
            text_feat[empty_text_mask] = 0

            # Create the pad mask for the text
            batch_size, maxlen = text_feat.shape[:2]
            tensor_text_length = torch.tensor(text_length, device=device)
            tensor_text_length[empty_text_mask] = 0
            text_pad_mask = torch.arange(maxlen, device=device).expand(batch_size, maxlen) < tensor_text_length[:, None]

        if motion_mask is not None:
            if motion_mask.dtype == torch.bool:
                motion_mask = 1 * motion_mask

        batch_size = text_feat.shape[0]

        # sample loop
        indices = list(range(num_denoising_steps))[::-1]
        shape = (batch_size, max_frames, self.motion_rep.motion_rep_dim)
        cur_mot = torch.randn(shape, device=self.device)
        num_denoising_steps = torch.tensor(
            [num_denoising_steps], device=self.device
        )  # this and t need to be tensor for onnx export
        # init diffusion with correct num steps before looping
        use_timesteps = self.diffusion.space_timesteps(num_denoising_steps[0])[0]
        self.diffusion.calc_diffusion_vars(use_timesteps)
        for i in progress_bar(indices):
            t = torch.tensor([i] * cur_mot.size(0), device=self.device)
            with torch.inference_mode():
                cur_mot = self.denoising_step(
                    cur_mot,
                    pad_mask,
                    text_feat,
                    text_pad_mask,
                    t,
                    first_heading_angle,
                    motion_mask,
                    observed_motion,
                    num_denoising_steps,
                    cfg_weight,
                    guide_masks=guide_masks,
                    cfg_type=cfg_type,
                )
        return cur_mot


================================================
FILE: kimodo/model/llm2vec/README.md
================================================
This is a patched version of the original [LLM2Vec](https://github.com/McGill-NLP/llm2vec) codebase so that `McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised` works with `transformers==5.0.0rc3`.


================================================
FILE: kimodo/model/llm2vec/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""LLM2Vec text encoder and wrapper for Kimodo."""

from .llm2vec import LLM2Vec
from .llm2vec_wrapper import LLM2VecEncoder

__all__ = [
    "LLM2Vec",
    "LLM2VecEncoder",
]


================================================
FILE: kimodo/model/llm2vec/llm2vec.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.


# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import logging
import os
from functools import partial
from typing import Dict, List, Optional, Union

import numpy as np
import torch
import torch.multiprocessing as mp
from peft import PeftModel
from torch import Tensor, device, nn
from tqdm.autonotebook import tqdm, trange
from transformers import (
    AutoConfig,
    AutoModel,
    AutoTokenizer,
    GemmaConfig,
    LlamaConfig,
    MistralConfig,
    PretrainedConfig,
    Qwen2Config,
)

logger = logging.getLogger(__name__)


def batch_to_device(batch, target_device: device):
    """Send a pytorch batch to a device (CPU/GPU)"""
    for key in batch:
        if isinstance(batch[key], Tensor):
            batch[key] = batch[key].to(target_device)
    return batch


class LLM2Vec(nn.Module):
    def __init__(
        self,
        model: AutoModel,
        tokenizer: AutoTokenizer,
        pooling_mode: str = "mean",
        max_length: int = 512,
        doc_max_length: int = 400,
        skip_instruction: bool = True,
    ):
        super().__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.pooling_mode = pooling_mode
        self.skip_instruction = skip_instruction
        self.max_length = max_length
        self.doc_max_length = doc_max_length
        self.config = model.config

    @classmethod
    def _get_model_class(cls, config_class_name, enable_bidirectional):
        if not enable_bidirectional:
            return AutoModel
        if config_class_name == "MistralConfig":
            from .models.bidirectional_mistral import MistralBiModel

            return MistralBiModel
        elif config_class_name == "LlamaConfig":
            from .models.bidirectional_llama import LlamaBiModel

            return LlamaBiModel
        elif config_class_name == "GemmaConfig":
            from .models.bidirectional_gemma import GemmaBiModel

            return GemmaBiModel
        elif config_class_name == "Qwen2Config":
            from .models.bidirectional_qwen2 import Qwen2BiModel

            return Qwen2BiModel
        else:
            raise ValueError(f"{config_class_name} is not supported yet with bidirectional models.")

    @classmethod
    def from_pretrained(
        cls,
        base_model_name_or_path,
        peft_model_name_or_path=None,
        merge_peft=False,
        enable_bidirectional=True,
        **kwargs,
    ):
        # pop out encoder args
        keys = ["pooling_mode", "max_length", "doc_max_length", "skip_instruction"]
        encoder_args = {key: kwargs.pop(key, None) for key in keys if kwargs.get(key) is not None}

        tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path)
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "left"

        config = AutoConfig.from_pretrained(base_model_name_or_path)
        config_class_name = config.__class__.__name__

        model_class = cls._get_model_class(config_class_name, enable_bidirectional=enable_bidirectional)

        model = model_class.from_pretrained(base_model_name_or_path, **kwargs)

        if os.path.isdir(base_model_name_or_path) and os.path.exists(f"{base_model_name_or_path}/config.json"):
            with open(f"{base_model_name_or_path}/config.json", "r") as fIn:
                config_dict = json.load(fIn)
            config = PretrainedConfig.from_dict(config_dict)
            model.config._name_or_path = config._name_or_path

        # For special case where config.json and adapter weights are in the same directory
        if hasattr(model, "peft_config"):
            model = PeftModel.from_pretrained(
                model,
                base_model_name_or_path,
            )
            model = model.merge_and_unload()

        if peft_model_name_or_path is not None:
            model = PeftModel.from_pretrained(
                model,
                peft_model_name_or_path,
            )
            if merge_peft:
                model = model.merge_and_unload()

        config = {}
        config_addr = peft_model_name_or_path if peft_model_name_or_path is not None else base_model_name_or_path
        if os.path.exists(f"{config_addr}/llm2vec_config.json"):
            with open(f"{config_addr}/llm2vec_config.json", "r") as fIn:
                llm2vec_config = json.load(fIn)
            config.update(llm2vec_config)

        for key, value in encoder_args.items():
            config[key] = value

        return cls(model=model, tokenizer=tokenizer, **config)

    def prepare_for_tokenization(self, text):
        if self.model.config._name_or_path == "meta-llama/Meta-Llama-3-8B-Instruct":
            text = "<|start_header_id|>user<|end_header_id|>\n\n" + text.strip() + "<|eot_id|>"
            return text
        if self.model.config._name_or_path in [
            "mistralai/Mistral-7B-Instruct-v0.2",
            "meta-llama/Llama-2-7b-chat-hf",
        ]:
            text = "[INST] " + text.strip() + " [/INST]"
        if self.model.config._name_or_path in [
            "google/gemma-2-9b-it",
        ]:
            text = "<bos><start_of_turn>user\n" + text.strip() + "<end_of_turn>"
        if self.model.config._name_or_path in [
            "Qwen/Qwen2-1.5B-Instruct",
            "Qwen/Qwen2-7B-Instruct",
        ]:
            text = "<|im_start|>user\n" + text.strip() + "<|im_end|>"
        if self.pooling_mode == "eos_token":
            if self.model.config._name_or_path == "meta-llama/Meta-Llama-3-8B":
                text = text.strip() + "<|end_of_text|>"
            elif isinstance(self.model.config, LlamaConfig) or isinstance(self.model.config, MistralConfig):
                text = text.strip() + " </s>"
            elif isinstance(self.model.config, GemmaConfig):
                text = text.strip() + "<eos>"
            elif isinstance(self.model.config, Qwen2Config):
                text = text.strip() + "<|endoftext|>"
        return text

    def tokenize(self, texts):
        texts_2 = []
        original_texts = []
        for text in texts:
            t = text.split("!@#$%^&*()")
            texts_2.append(t[1] if len(t) > 1 else "")
            original_texts.append("".join(t))

        original = self.tokenizer(
            original_texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=self.max_length,
        )
        embed_mask = None
        for t_i, t in enumerate(texts_2):
            ids = self.tokenizer(
                [t],
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=self.max_length,
                add_special_tokens=False,
            )
            if embed_mask is None:
                e_m = torch.zeros_like(original["attention_mask"][t_i])
                if len(ids["input_ids"][0]) > 0:
                    e_m[-len(ids["input_ids"][0]) :] = torch.ones(len(ids["input_ids"][0]))
                embed_mask = e_m.unsqueeze(0)
            else:
                e_m = torch.zeros_like(original["attention_mask"][t_i])
                if len(ids["input_ids"][0]) > 0:
                    e_m[-len(ids["input_ids"][0]) :] = torch.ones(len(ids["input_ids"][0]))
                embed_mask = torch.cat((embed_mask, e_m.unsqueeze(0)), dim=0)

        original["embed_mask"] = embed_mask
        return original

    def _skip_instruction(self, sentence_feature):
        assert sentence_feature["attention_mask"].shape == sentence_feature["embed_mask"].shape
        sentence_feature["attention_mask"] = sentence_feature["embed_mask"]

    def forward(self, sentence_feature: Dict[str, Tensor]):
        embed_mask = None
        if "embed_mask" in sentence_feature:
            embed_mask = sentence_feature.pop("embed_mask")
        reps = self.model(**sentence_feature)
        sentence_feature["embed_mask"] = embed_mask

        return self.get_pooling(sentence_feature, reps.last_hidden_state)

    def get_pooling(self, features, last_hidden_states):  # All models padded from left
        assert self.tokenizer.padding_side == "left", "Pooling modes are implemented for padding from left."
        if self.skip_instruction:
            self._skip_instruction(features)
        seq_lengths = features["attention_mask"].sum(dim=-1)
        if self.pooling_mode == "mean":
            return torch.stack(
                [last_hidden_states[i, -length:, :].mean(dim=0) for i, length in enumerate(seq_lengths)],
                dim=0,
            )
        elif self.pooling_mode == "weighted_mean":
            bs, l, _ = last_hidden_states.shape
            complete_weights = torch.zeros(bs, l, device=last_hidden_states.device)
            for i, seq_l in enumerate(seq_lengths):
                if seq_l > 0:
                    complete_weights[i, -seq_l:] = torch.arange(seq_l) + 1
                    complete_weights[i] /= torch.clamp(complete_weights[i].sum(), min=1e-9)
            return torch.sum(last_hidden_states * complete_weights.unsqueeze(-1), dim=1)
        elif self.pooling_mode == "eos_token" or self.pooling_mode == "last_token":
            return last_hidden_states[:, -1]
        elif self.pooling_mode == "bos_token":
            return last_hidden_states[features["input_ids"] == self.tokenizer.bos_token_id]
        else:
            raise ValueError(f"{self.pooling_mode} is not implemented yet.")

    def _convert_to_str(self, instruction, text):
        tokenized_q = self.tokenizer(
            text,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=self.max_length,
            add_special_tokens=False,
        )
        tokenized_q_length = len(tokenized_q["input_ids"][0])

        while tokenized_q_length > self.doc_max_length:
            reduction_ratio = self.doc_max_length / tokenized_q_length
            reduced_length = int(len(text.split()) * reduction_ratio)
            text = " ".join(text.split()[:reduced_length])
            tokenized_q = self.tokenizer(
                text,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=self.max_length,
                add_special_tokens=False,
            )
            tokenized_q_length = len(tokenized_q["input_ids"][0])

        return f"{instruction.strip()} !@#$%^&*(){text}" if instruction else f"!@#$%^&*(){text}"

    def encode(
        self,
        sentences: Union[str, List[str]],
        batch_size: int = 32,
        show_progress_bar: bool = True,
        convert_to_numpy: bool = False,
        convert_to_tensor: bool = False,
        device: Optional[str] = None,
    ):
        """
        Encode a list of sentences to their respective embeddings. The sentences can be a list of strings or a string.
        Args:
            sentences: sentence or sentences to encode.
            batch_size: batch size for turning sentence tokens into embeddings.
            show_progress_bar: whether to show progress bars during encoding steps.
            convert_to_numpy: If true, return numpy arrays instead of torch tensors.
            convert_to_tensor: If true, return torch tensors (default).
            device: torch backend device identifier (e.g., 'cuda', 'cpu','mps' etc.). If not specified,
            the default is to use cuda when available, otherwise cpu. Note that only the choice of 'cuda' supports
            multiprocessing as currently implemented.

        Returns: embeddings of the sentences. Embeddings are detached and always on the CPU (see _encode implementation).

        """
        if isinstance(sentences[0], str) and isinstance(sentences[-1], int):
            sentences = [sentences]
        # required for MEDI version of MTEB
        if isinstance(sentences[0], str):
            sentences = [[""] + [sentence] for sentence in sentences]

        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"

        concatenated_input_texts = []
        for sentence in sentences:
            assert isinstance(sentence[0], str)
            assert isinstance(sentence[1], str)
            concatenated_input_texts.append(self._convert_to_str(sentence[0], sentence[1]))
        sentences = concatenated_input_texts

        self.eval()

        if convert_to_tensor:
            convert_to_numpy = False

        length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
        sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
        all_embeddings = []

        if torch.cuda.device_count() <= 1:
            # This branch also support mps devices
            self.to(device)
            for start_index in trange(
                0,
                len(sentences),
                batch_size,
                desc="Batches",
                disable=not show_progress_bar,
            ):
                sentences_batch = sentences_sorted[start_index : start_index + batch_size]
                embeddings = self._encode(sentences_batch, device=device, convert_to_numpy=convert_to_numpy)
                all_embeddings.append(embeddings)
        else:
            num_proc = torch.cuda.device_count()
            cuda_compatible_multiprocess = mp.get_context("spawn")
            with cuda_compatible_multiprocess.Pool(num_proc) as p:
                sentences_batches = [
                    sentences_sorted[start_index : start_index + batch_size]
                    for start_index in range(0, len(sentences), batch_size)
                ]

                progress_bar = tqdm(
                    total=len(sentences_batches),
                    desc="Batches",
                    disable=not show_progress_bar,
                )
                results = []

                def update(*args):
                    progress_bar.update()

                for batch in sentences_batches:
                    results.append(
                        p.apply_async(
                            self._encode,
                            args=(batch, None, convert_to_numpy, True),
                            callback=update,
                        )
                    )

                all_embeddings = [result.get() for result in results]
                progress_bar.close()

        all_embeddings = torch.cat(all_embeddings, dim=0)
        all_embeddings = all_embeddings[np.argsort(length_sorted_idx)]
        all_embeddings = all_embeddings.to(torch.float32)
        if convert_to_numpy:
            all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
        return all_embeddings

    def save(self, output_path, merge_before_save=False, save_config=True):
        if merge_before_save and isinstance(self.model, PeftModel):
            self.model = self.model.merge_and_unload()
            # Fixes the issue of saving - https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse/discussions/1
            if hasattr(self.model, "_hf_peft_config_loaded"):
                self.model._hf_peft_config_loaded = False

        self.model.save_pretrained(output_path)
        self.tokenizer.save_pretrained(output_path)

        llm2vec_config = {
            "pooling_mode": self.pooling_mode,
            "max_length": self.max_length,
            "doc_max_length": self.doc_max_length,
            "skip_instruction": self.skip_instruction,
        }

        if save_config:
            os.makedirs(output_path, exist_ok=True)
            with open(f"{output_path}/llm2vec_config.json", "w") as fOut:
                json.dump(llm2vec_config, fOut, indent=4)

    def _encode(
        self,
        sentences_batch,
        device: Optional[str] = None,
        convert_to_numpy: bool = False,
        multiprocessing=False,
    ):
        if multiprocessing:
            # multiprocessing only supports CUDA devices at this time, so we ignore the value of device
            # and use cuda:rank for the device
            rank = mp.current_process()._identity[0]
            if device is None and torch.cuda.is_available():
                device = f"cuda:{rank % torch.cuda.device_count()}"

        self.to(device)
        features = self.tokenize([self.prepare_for_tokenization(sentence) for sentence in sentences_batch])
        features = batch_to_device(features, device)

        with torch.no_grad():
            embeddings = self.forward(features)
            embeddings = embeddings.detach()
            embeddings = embeddings.cpu()

        return embeddings

    def _text_length(self, text: Union[List[int], List[List[int]]]):
        """Help function to get the length for the input text.

        Text can be either a string (which means a single text) a list of ints (which means a single
        tokenized text), or a tuple of list of ints (representing several text inputs to the model).
        """
        if (
            isinstance(text, str) or (isinstance(text, list) and isinstance(text[0], int)) or len(text) == 0
        ):  # Single text, list of ints, or empty
            return len(text)
        if isinstance(text, dict):  # {key: value} case
            return len(next(iter(text.values())))
        elif not hasattr(text, "__len__"):  # Object has no len() method
            return 1
        else:
            return sum([len(t) for t in text])

    def resize_token_embeddings(
        self,
        new_num_tokens: Optional[int] = None,
        pad_to_multiple_of: Optional[int] = None,
    ) -> nn.Embedding:
        return self.model.resize_token_embeddings(new_num_tokens=new_num_tokens, pad_to_multiple_of=pad_to_multiple_of)

    def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None):
        self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)


================================================
FILE: kimodo/model/llm2vec/llm2vec_wrapper.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""LLM2Vec encoder wrapper for Kimodo text conditioning."""

import os

import numpy as np
import torch

from .llm2vec import LLM2Vec


class LLM2VecEncoder:
    """LLM2Vec text embeddings."""

    def __init__(
        self,
        base_model_name_or_path: str,
        peft_model_name_or_path: str,
        dtype: str,
        llm_dim: int,
        device: str = "auto",
    ) -> None:
        torch_dtype = getattr(torch, dtype)
        self.llm_dim = llm_dim

        cache_dir = os.environ.get("HUGGINGFACE_CACHE_DIR")

        if "TEXT_ENCODERS_DIR" in os.environ:
            base_model_name_or_path = os.path.join(os.environ["TEXT_ENCODERS_DIR"], base_model_name_or_path)
            peft_model_name_or_path = os.path.join(os.environ["TEXT_ENCODERS_DIR"], peft_model_name_or_path)

        self.model = LLM2Vec.from_pretrained(
            base_model_name_or_path=base_model_name_or_path,
            peft_model_name_or_path=peft_model_name_or_path,
            torch_dtype=torch_dtype,
            cache_dir=cache_dir,
        )

        env_device = os.environ.get("TEXT_ENCODER_DEVICE")
        if env_device:
            device = env_device
        if device == "auto":
            device = "cuda" if torch.cuda.is_available() else "cpu"
        self._device = device
        if device is not None:
            self.model = self.model.to(device)

        self.model.eval()
        for p in self.model.parameters():
            p.requires_grad = False

    def to(self, device: torch.device):
        self.model = self.model.to(device)
        self._device = str(device) if not isinstance(device, str) else device
        return self

    def eval(self):
        self.model.eval()
        return self

    def get_device(self):
        return self.model.model.device

    def __call__(self, text: list[str] | str):
        is_string = False
        if isinstance(text, str):
            text = [text]
            is_string = True

        with torch.no_grad():
            encoded_text = self.model.encode(
                text,
                # IMPORTANT: different batch sizes unexpectedly change the output embeddings, so we always set it to 1
                #            here for repeatability no matter how many texts are being encoded. This
                #            is a fundamental issue with transformers, and is especially bad at lower
                #            precisions (https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535)
                #            note: this is an internal batch size used by llm2vec - the text list can still be of arbitrary length.
                batch_size=1,
                show_progress_bar=False,
                device=self._device,
            )

        assert len(encoded_text.shape)
        assert self.llm_dim == encoded_text.shape[-1]

        encoded_text = encoded_text[:, None]
        lengths = np.ones(len(encoded_text), dtype=int).tolist()

        if is_string:
            encoded_text = encoded_text[0]
            lengths = lengths[0]

        encoded_text = torch.tensor(encoded_text).to(self._device)
        return encoded_text, lengths


================================================
FILE: kimodo/model/llm2vec/models/__init__.py
================================================
# from .bidirectional_gemma import GemmaBiForMNTP, GemmaBiModel
# from .bidirectional_llama import LlamaBiForMNTP, LlamaBiModel
# from .bidirectional_mistral import MistralBiForMNTP, MistralBiModel
# from .bidirectional_qwen2 import Qwen2BiForMNTP, Qwen2BiModel


================================================
FILE: kimodo/model/llm2vec/models/attn_mask_utils.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from typing import List, Optional, Tuple, Union

import torch
from transformers.modeling_attn_mask_utils import AttentionMaskConverter


def _prepare_4d_causal_attention_mask(
    attention_mask: Optional[torch.Tensor],
    input_shape: Union[torch.Size, Tuple, List],
    inputs_embeds: torch.Tensor,
    past_key_values_length: int,
    sliding_window: Optional[int] = None,
):
    """Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D
    mask of shape `(batch_size, key_value_length)`

    Args:
        attention_mask (`torch.Tensor` or `None`):
            A 2D attention mask of shape `(batch_size, key_value_length)`
        input_shape (`tuple(int)` or `list(int)` or `torch.Size`):
            The input shape should be a tuple that defines `(batch_size, query_length)`.
        inputs_embeds (`torch.Tensor`):
            The embedded inputs as a torch Tensor.
        past_key_values_length (`int`):
            The length of the key value cache.
        sliding_window (`int`, *optional*):
            If the model uses windowed attention, a sliding window should be passed.
    """
    attn_mask_converter = AttentionMaskConverter(
        is_causal=False, sliding_window=sliding_window
    )  # is_causal=True in original implementation

    key_value_length = input_shape[-1] + past_key_values_length

    # 4d mask is passed through the layers
    if attention_mask is not None and len(attention_mask.shape) == 2:
        attention_mask = attn_mask_converter.to_4d(
            attention_mask,
            input_shape[-1],
            key_value_length=key_value_length,
            dtype=inputs_embeds.dtype,
        )
    elif attention_mask is not None and len(attention_mask.shape) == 4:
        expected_shape = (input_shape[0], 1, input_shape[1], key_value_length)
        if tuple(attention_mask.shape) != expected_shape:
            raise ValueError(
                f"Incorrect 4D attention_mask shape: {tuple(attention_mask.shape)}; expected: {expected_shape}."
            )
        else:
            # if the 4D mask has correct shape - invert it and fill with negative infinity
            inverted_mask = 1.0 - attention_mask
            attention_mask = inverted_mask.masked_fill(
                inverted_mask.to(torch.bool), torch.finfo(inputs_embeds.dtype).min
            )
    else:
        attention_mask = attn_mask_converter.to_causal_4d(
            input_shape[0],
            input_shape[-1],
            key_value_length,
            dtype=inputs_embeds.dtype,
            device=inputs_embeds.device,
        )

    return attention_mask


# Adapted from _prepare_4d_causal_attention_mask
def _prepare_4d_causal_attention_mask_for_sdpa(
    attention_mask: Optional[torch.Tensor],
    input_shape: Union[torch.Size, Tuple, List],
    inputs_embeds: torch.Tensor,
    past_key_values_length: int,
    sliding_window: Optional[int] = None,
):
    """Prepares the correct `attn_mask` argument to be used by
    `torch.nn.functional.scaled_dot_product_attention`.

    In case no token is masked in the `attention_mask` argument, we simply set it to `None` for the cases `query_length == 1` and
    `key_value_length == query_length`, and rely instead on SDPA `is_causal` argument to use causal/non-causal masks,
    allowing to dispatch to the flash attention kernel (that can otherwise not be used if a custom `attn_mask` is passed).
    """
    attn_mask_converter = AttentionMaskConverter(
        is_causal=False, sliding_window=sliding_window
    )  # is_causal=True in original implementation

    key_value_length = input_shape[-1] + past_key_values_length
    batch_size, query_length = input_shape

    # torch.jit.trace, symbolic_trace and torchdynamo with fullgraph=True are unable to capture the controlflow `is_causal=attention_mask is None and q_len > 1`
    # used as an SDPA argument. We keep compatibility with these tracing tools by always using SDPA's `attn_mask` argument in case we are tracing.
    # TODO: For dynamo, rather use a check on fullgraph=True once this is possible (https://github.com/pytorch/pytorch/pull/120400).
    is_tracing = (
        torch.jit.is_tracing()
        or isinstance(inputs_embeds, torch.fx.Proxy)
        or (hasattr(torch, "_dynamo") and torch._dynamo.is_compiling())
    )

    if attention_mask is not None:
        # 4d mask is passed through
        if len(attention_mask.shape) == 4:
            expected_shape = (input_shape[0], 1, input_shape[1], key_value_length)
            if tuple(attention_mask.shape) != expected_shape:
                raise ValueError(
                    f"Incorrect 4D attention_mask shape: {tuple(attention_mask.shape)}; expected: {expected_shape}."
                )
            else:
                # if the 4D mask has correct shape - invert it and fill with negative infinity
                inverted_mask = 1.0 - attention_mask.to(inputs_embeds.dtype)
                attention_mask = inverted_mask.masked_fill(
                    inverted_mask.to(torch.bool), torch.finfo(inputs_embeds.dtype).min
                )
                return attention_mask

        elif not is_tracing and torch.all(attention_mask == 1):
            if query_length == 1:
                # For query_length == 1, causal attention and bi-directional attention are the same.
                attention_mask = None
            elif key_value_length == query_length:
                attention_mask = None
            else:
                # Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
                # may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.
                # Reference: https://github.com/pytorch/pytorch/issues/108108
                pass
    elif query_length > 1 and key_value_length != query_length:
        # See the comment above (https://github.com/pytorch/pytorch/issues/108108).
        # Ugly: we set it to True here to dispatch in the following controlflow to `to_causal_4d`.
        attention_mask = True
    elif is_tracing:
        raise ValueError(
            'Attention using SDPA can not be traced with torch.jit.trace when no attention_mask is provided. To solve this issue, please either load your model with the argument `attn_implementation="eager"` or pass an attention_mask input when tracing the model.'
        )

    if attention_mask is None:
        expanded_4d_mask = None
    elif attention_mask is True:
        expanded_4d_mask = attn_mask_converter.to_causal_4d(
            input_shape[0],
            input_shape[-1],
            key_value_length,
            dtype=inputs_embeds.dtype,
            device=inputs_embeds.device,
        )
    else:
        expanded_4d_mask = attn_mask_converter.to_4d(
            attention_mask,
            input_shape[-1],
            dtype=inputs_embeds.dtype,
            key_value_length=key_value_length,
        )

        # Attend to all tokens in masked rows from the causal_mask, for example the relevant first rows when
        # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path.
        # Details: https://github.com/pytorch/pytorch/issues/110213
        if not is_tracing and expanded_4d_mask.device.type == "cuda":
            expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
                expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min
            )

    return expanded_4d_mask


================================================
FILE: kimodo/model/llm2vec/models/bidirectional_llama.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from peft import PeftModel
from torch import nn
from transformers import LlamaConfig, LlamaForCausalLM, LlamaModel, LlamaPreTrainedModel
from transformers.cache_utils import Cache, StaticCache
from transformers.modeling_attn_mask_utils import AttentionMaskConverter
from transformers.models.llama.modeling_llama import (
    LlamaAttention,
    LlamaDecoderLayer,
    # LlamaFlashAttention2,
    LlamaMLP,
    LlamaRMSNorm,
    LlamaRotaryEmbedding,
    # LlamaSdpaAttention,
)
from transformers.utils import logging

from .utils import is_transformers_attn_greater_or_equal_4_43_1

logger = logging.get_logger(__name__)


class ModifiedLlamaAttention(LlamaAttention):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.is_causal = False


# class ModifiedLlamaFlashAttention2(LlamaFlashAttention2):
#     def __init__(self, *args, **kwargs):
#         super().__init__(*args, **kwargs)
#         self.is_causal = False


# class ModifiedLlamaSdpaAttention(LlamaSdpaAttention):
#     def __init__(self, *args, **kwargs):
#         super().__init__(*args, **kwargs)
#         self.is_causal = False


# LLAMA_ATTENTION_CLASSES = {
#     "eager": ModifiedLlamaAttention,
#     "flash_attention_2": ModifiedLlamaFlashAttention2,
#     "sdpa": ModifiedLlamaSdpaAttention,
# }


class ModifiedLlamaDecoderLayer(LlamaDecoderLayer):
    def __init__(self, config: LlamaConfig, layer_idx: int):
        nn.Module.__init__(self)
        self.hidden_size = config.hidden_size

        self.self_attn = ModifiedLlamaAttention(config=config, layer_idx=layer_idx)
        # self.self_attn = LLAMA_ATTENTION_CLASSES[config._attn_implementation](
        # config=config, layer_idx=layer_idx
        # )

        self.mlp = LlamaMLP(config)
        self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)


class LlamaBiModel(LlamaModel):
    _no_split_modules = ["ModifiedLlamaDecoderLayer"]

    def __init__(self, config: LlamaConfig):
        if not is_transformers_attn_greater_or_equal_4_43_1():
            raise ValueError(
                "The current implementation of LlamaEncoderModel follows modeling_llama.py of transformers version >= 4.43.1"
            )
        LlamaPreTrainedModel.__init__(self, config)
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size

        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
        self.layers = nn.ModuleList(
            [ModifiedLlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
        )
        self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.rotary_emb = LlamaRotaryEmbedding(config=config)
        self.gradient_checkpointing = False

        # Initialize weights and apply final processing
        self.post_init()

    def _update_causal_mask(
        self,
        attention_mask,
        input_tensor,
        cache_position,
        past_key_values: Cache,
        output_attentions: bool,
    ):
        if self.config._attn_implementation == "flash_attention_2":
            if attention_mask is not None and 0.0 in attention_mask:
                return attention_mask
            return None

        # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in
        # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail
        # to infer the attention mask.
        past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
        using_static_cache = isinstance(past_key_values, StaticCache)

        # When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward
        # if self.config._attn_implementation == "sdpa" and not using_static_cache and not output_attentions:
        #     if AttentionMaskConverter._ignore_causal_mask_sdpa(
        #         attention_mask,
        #         inputs_embeds=input_tensor,
        #         past_key_values_length=past_seen_tokens,
        #         is_training=self.training,
        #     ):
        #         return None

        dtype, device = input_tensor.dtype, input_tensor.device
        min_dtype = torch.finfo(dtype).min
        sequence_length = input_tensor.shape[1]
        if using_static_cache:
            target_length = past_key_values.get_max_length()
        else:
            target_length = (
                attention_mask.shape[-1]
                if isinstance(attention_mask, torch.Tensor)
                else past_seen_tokens + sequence_length + 1
            )

        causal_mask = torch.zeros(
            (sequence_length, target_length), dtype=dtype, device=device
        )  # in original implementation - torch.full((sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device)
        # Commenting out next 2 lines to disable causal masking
        # if sequence_length != 1:
        #     causal_mask = torch.triu(causal_mask, diagonal=1)
        causal_mask *= torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
        causal_mask = causal_mask[None, None, :, :].expand(input_tensor.shape[0], 1, -1, -1)
        if attention_mask is not None:
            causal_mask = causal_mask.clone()  # copy to contiguous memory for in-place edit
            if attention_mask.dim() == 2:
                mask_length = attention_mask.shape[-1]
                padding_mask = causal_mask[..., :mask_length].eq(0.0) * attention_mask[:, None, None, :].eq(0.0)
                causal_mask[..., :mask_length] = causal_mask[..., :mask_length].masked_fill(padding_mask, min_dtype)
            elif attention_mask.dim() == 4:
                # backwards compatibility: we allow passing a 4D attention mask shorter than the input length with
                # cache. In that case, the 4D attention mask attends to the newest tokens only.
                if attention_mask.shape[-2] < cache_position[0] + sequence_length:
                    offset = cache_position[0]
                else:
                    offset = 0
                mask_shape = attention_mask.shape
                mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype
                causal_mask[
                    : mask_shape[0],
                    : mask_shape[1],
                    offset : mask_shape[2] + offset,
                    : mask_shape[3],
                ] = mask_slice

        if (
            self.config._attn_implementation == "sdpa"
            and attention_mask is not None
            and attention_mask.device.type == "cuda"
            and not output_attentions
        ):
            causal_mask = AttentionMaskConverter._unmask_unattended(causal_mask, min_dtype)

        return causal_mask


class LlamaBiForMNTP(LlamaForCausalLM):
    def __init__(self, config):
        LlamaPreTrainedModel.__init__(self, config)
        self.model = LlamaBiModel(config)
        self.vocab_size = config.vocab_size
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

        # Initialize weights and apply final processing
        self.post_init()

    # getter for PEFT model
    def get_model_for_peft(self):
        return self.model

    # setter for PEFT model
    def set_model_for_peft(self, model: PeftModel):
        self.model = model

    # save the PEFT model
    def save_peft_model(self, path):
        self.model.save_pretrained(path)


================================================
FILE: kimodo/model/llm2vec/models/utils.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2024 McGill NLP
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

import importlib.metadata

from packaging import version
from transformers.utils.import_utils import _is_package_available


def is_transformers_attn_greater_or_equal_4_43_1():
    if not _is_package_available("transformers"):
        return False

    return version.parse(importlib.metadata.version("transformers")) >= version.parse("4.43.1")


================================================
FILE: kimodo/model/load_model.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Load Kimodo diffusion models from local checkpoints or Hugging Face."""

from pathlib import Path
from typing import Optional

from huggingface_hub import snapshot_download
from omegaconf import OmegaConf

from .loading import (
    AVAILABLE_MODELS,
    DEFAULT_MODEL,
    DEFAULT_TEXT_ENCODER_URL,
    MODEL_NAMES,
    TMR_MODELS,
    get_env_var,
    instantiate_from_dict,
)
from .registry import get_model_info, resolve_model_name

DEFAULT_TEXT_ENCODER = "llm2vec"
TEXT_ENCODER_PRESETS = {
    "llm2vec": {
        "target": "kimodo.model.LLM2VecEncoder",
        "kwargs": {
            "base_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp",
            "peft_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised",
            "dtype": "bfloat16",
            "llm_dim": 4096,
            "device": "auto",
        },
    }
}


def _resolve_hf_model_path(modelname: str) -> Path:
    """Resolve model name to a local path, using Hugging Face cache or CHECKPOINT_DIR."""
    try:
        repo_id = MODEL_NAMES[modelname]
    except KeyError:
        raise ValueError(f"Model '{modelname}' not found. Available models: {MODEL_NAMES.keys()}")

    local_cache = get_env_var("LOCAL_CACHE", "False").lower() == "true"
    if not local_cache:
        snapshot_dir = snapshot_download(repo_id=repo_id)  # will check online no matter what
        return Path(snapshot_dir)

    try:
        snapshot_dir = snapshot_download(repo_id=repo_id, local_files_only=True)  # will check local cache only
        return Path(snapshot_dir)
    except Exception:
        # if local cache is not found, download from online
        try:
            snapshot_dir = snapshot_download(repo_id=repo_id)
            return Path(snapshot_dir)
        except Exception:
            raise RuntimeError(f"Could not resolve model '{modelname}' from Hugging Face (repo: {repo_id}). ") from None


def _build_api_text_encoder_conf(text_encoder_url: str) -> dict:
    return {
        "_target_": "kimodo.model.text_encoder_api.TextEncoderAPI",
        "url": text_encoder_url,
    }


def _build_local_text_encoder_conf(text_encoder_fp32: bool = False) -> dict:
    text_encoder_name = get_env_var("TEXT_ENCODER", DEFAULT_TEXT_ENCODER)
    if text_encoder_name not in TEXT_ENCODER_PRESETS:
        available = ", ".join(sorted(TEXT_ENCODER_PRESETS))
        raise ValueError(f"Unknown TEXT_ENCODER='{text_encoder_name}'. Available: {available}")

    preset = TEXT_ENCODER_PRESETS[text_encoder_name]
    if text_encoder_fp32:
        preset["kwargs"]["dtype"] = "float32"
    return {
        "_target_": preset["target"],
        **preset["kwargs"],
    }


def _select_text_encoder_conf(text_encoder_url: str, text_encoder_fp32: bool = False) -> dict:
    # TEXT_ENCODER_MODE options:
    # - "api": force TextEncoderAPI
    # - "local": force local LLM2VecEncoder
    # - "auto": try API first, fallback to local if unreachable
    mode = get_env_var("TEXT_ENCODER_MODE", "auto").lower()
    if mode == "local":
        return _build_local_text_encoder_conf(text_encoder_fp32)
    if mode == "api":
        return _build_api_text_encoder_conf(text_encoder_url)

    api_conf = _build_api_text_encoder_conf(text_encoder_url)
    try:
        text_encoder = instantiate_from_dict(api_conf)
        # Probe availability early so inference doesn't fail later.
        text_encoder(["healthcheck"])
        return api_conf
    except Exception as error:
        print(
            "Text encoder service is unreachable, falling back to local LLM2Vec "
            f"encoder. ({type(error).__name__}: {error})"
        )
        return _build_local_text_encoder_conf(text_encoder_fp32)


def load_model(
    modelname=None,
    device=None,
    eval_mode: bool = True,
    default_family: Optional[str] = "Kimodo",
    return_resolved_name: bool = False,
    text_encoder=None,
    text_encoder_fp32: bool = False,
):
    """Load a kimodo model by name (e.g. 'g1', 'soma').

    Resolution of partial/full names (e.g. Kimodo-SOMA-RP-v1, SOMA) is done
    inside this function using default_family when the name is not a known
    short key.

    Args:
        modelname: Model identifier; uses DEFAULT_MODEL if None. Can be a short key,
            a full name (e.g. Kimodo-SOMA-RP-v1), or a partial name; unknown names
            are resolved via resolve_model_name using default_family.
        device: Target device for the model (e.g. 'cuda', 'cpu').
        eval_mode: If True, set model to eval mode.
        default_family: Used when modelname is not in AVAILABLE_MODELS to resolve
            partial names ("Kimodo" for demo/generation, "TMR" for embed script).
            Default "Kimodo".
        return_resolved_name: If True, return (model, resolved_short_key). If False,
            return only the model.
        text_encoder: Pre-built text encoder to reuse. When provided, skips
            text encoder selection/instantiation entirely.
        text_encoder_fp32: If True, uses fp32 for the text encoder rather than default bfloat16.

    Returns:
        Loaded model in eval mode, or (model, resolved short key) if
        return_resolved_name is True.

    Raises:
        ValueError: If modelname is not in AVAILABLE_MODELS and cannot be resolved.
        FileNotFoundError: If config.yaml is missing in the checkpoint folder.
    """
    if modelname is None:
        modelname = DEFAULT_MODEL
    if modelname not in AVAILABLE_MODELS:
        if default_family is not None:
            modelname = resolve_model_name(modelname, default_family)
        else:
            raise ValueError(
                f"""The model is not recognized.
            Please choose between: {AVAILABLE_MODELS}"""
            )

    resolved_modelname = modelname

    # In case, we specify a custom checkpoint directory
    configured_checkpoint_dir = get_env_var("CHECKPOINT_DIR")
    if configured_checkpoint_dir:
        print(f"CHECKPOINT_DIR is set to {configured_checkpoint_dir}, checking the local cache...")
        # Checkpoint folders are named by display name (e.g. Kimodo-SOMA-RP-v1)
        info = get_model_info(modelname)
        checkpoint_folder_name = info.display_name if info is not None else modelname
        model_path = Path(configured_checkpoint_dir) / checkpoint_folder_name
        if not model_path.exists() and modelname != checkpoint_folder_name:
            # Fallback: try short_key for backward compatibility
            model_path = Path(configured_checkpoint_dir) / modelname
        if not model_path.exists():
            print(f"Model folder not found at '{model_path}', downloading it from Hugging Face...")
            model_path = _resolve_hf_model_path(modelname)
    else:
        # Otherwise, we load the model from the local cache or download it from Hugging Face.
        model_path = _resolve_hf_model_path(modelname)

    model_config_path = model_path / "config.yaml"
    if not model_config_path.exists():
        raise FileNotFoundError(f"The model checkpoint folder exists but config.yaml is missing: {model_config_path}")

    model_conf = OmegaConf.load(model_config_path)

    if modelname in TMR_MODELS:
        # Same process at the moment for TMR and Kimodo
        pass

    if text_encoder is not None:
        runtime_conf = OmegaConf.create({"checkpoint_dir": str(model_path)})
    else:
        text_encoder_url = get_env_var("TEXT_ENCODER_URL", DEFAULT_TEXT_ENCODER_URL)
        runtime_conf = OmegaConf.create(
            {
                "checkpoint_dir": str(model_path),
                "text_encoder": _select_text_encoder_conf(text_encoder_url, text_encoder_fp32),
            }
        )

    model_cfg = OmegaConf.to_container(OmegaConf.merge(model_conf, runtime_conf), resolve=True)
    model_cfg.pop("checkpoint_dir", None)

    if text_encoder is not None:
        # Prevent Hydra from instantiating a new text encoder; pass None so
        # Kimodo.__init__ receives a placeholder we replace immediately after.
        model_cfg["text_encoder"] = None

    model = instantiate_from_dict(model_cfg, overrides={"device": device})

    if text_encoder is not None:
        model.text_encoder = text_encoder

    if eval_mode:
        model = model.eval()
    if return_resolved_name:
        return model, resolved_modelname
    return model


================================================
FILE: kimodo/model/loading.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Model loading utilities: checkpoints, registry, env, and Hydra-based instantiation."""

import os
from pathlib import Path
from typing import Any, Dict, Optional, Union

import torch
from hydra.utils import instantiate
from omegaconf import OmegaConf
from safetensors.torch import load_file as load_safetensors

from .registry import (
    AVAILABLE_MODELS,
    DEFAULT_MODEL,
    DEFAULT_TEXT_ENCODER_URL,
    KIMODO_MODELS,
    MODEL_NAMES,
    TMR_MODELS,
)


def get_env_var(name: str, default: Optional[str] = None) -> Optional[str]:
    """Return environment variable value, or default if unset/empty."""
    return os.environ.get(name) or default


def instantiate_from_dict(
    cfg: Dict[str, Any],
    overrides: Optional[Dict[str, Any]] = None,
):
    """Instantiate an object from a config dict (e.g. from OmegaConf.to_container).

    The dict must contain _target_ with a fully qualified class path. Nested configs are
    instantiated recursively.
    """
    if overrides:
        cfg = {**cfg, **overrides}
    conf = OmegaConf.create(cfg)
    return instantiate(conf)


def load_checkpoint_state_dict(ckpt_path: Union[str, Path]) -> dict:
    """Load a state dict from a checkpoint file.

    If the checkpoint is a dict with a 'state_dict' key (e.g. PyTorch Lightning),
    that is returned; otherwise the whole checkpoint is treated as the state dict.

    Args:
        ckpt_path: Path to the checkpoint file.

    Returns:
        state_dict suitable for model.load_state_dict().
    """
    ckpt_path = str(ckpt_path)

    if ckpt_path.endswith(".safetensors"):
        state_dict = load_safetensors(ckpt_path)
    else:
        checkpoint = torch.load(ckpt_path, map_location="cpu", weights_only=False)
        if isinstance(checkpoint, dict) and "state_dict" in checkpoint:
            state_dict = checkpoint["state_dict"]
        elif isinstance(checkpoint, dict):
            state_dict = checkpoint
        else:
            raise ValueError(f"Unsupported checkpoint format: {ckpt_path}")
    return {key: val.detach().cpu() for key, val in state_dict.items()}


__all__ = [
    "get_env_var",
    "instantiate_from_dict",
    "KIMODO_MODELS",
    "TMR_MODELS",
    "AVAILABLE_MODELS",
    "MODEL_NAMES",
    "DEFAULT_MODEL",
    "DEFAULT_TEXT_ENCODER_URL",
    "load_checkpoint_state_dict",
]


================================================
FILE: kimodo/model/registry.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Registry of model names and Hugging Face repo IDs for Kimodo and TMR.

Canonical source of truth is the list of repo IDs. Short keys (e.g. soma-rp) and metadata (dataset,
skeleton, version, display name) are derived by parsing.
"""

import re
from dataclasses import dataclass
from typing import Optional

# Canonical list: repo IDs in the same syntax as Hugging Face (org/Model-Name-v1).
# Parser expects: org/Family-SKELETON-DATASET-version (e.g. Kimodo-SOMA-RP-v1).
KIMODO_REPO_IDS = [
    "nvidia/Kimodo-SOMA-RP-v1",
    "nvidia/Kimodo-SOMA-RP-v1.1",
    "nvidia/Kimodo-SMPLX-RP-v1",
    "nvidia/Kimodo-G1-RP-v1",
    "nvidia/Kimodo-SOMA-SEED-v1",
    "nvidia/Kimodo-SOMA-SEED-v1.1",
    "nvidia/Kimodo-G1-SEED-v1",
]
TMR_REPO_IDS = [
    "nvidia/TMR-SOMA-RP-v1",
]

# Repo ID without org, for display (e.g. Kimodo-SOMA-RP-v1).
_REPO_NAME_PATTERN = re.compile(r"^(Kimodo|TMR)-([A-Za-z0-9]+)-(RP|SEED)-v(\d+(?:\.\d+)*)$")


@dataclass
class ModelInfo:
    """Structured metadata for one model, derived from its repo ID."""

    repo_id: str
    short_key: str
    family: str
    skeleton: str
    dataset: str
    version: str
    display_name: str

    @property
    def dataset_ui_label(self) -> str:
        return "Rigplay" if self.dataset == "RP" else "SEED"


def _parse_repo_id(repo_id: str) -> Optional[ModelInfo]:
    """Parse a repo ID into ModelInfo.

    Returns None if format is unrecognized.
    """
    # repo_id is "org/Model-Name-v1"
    if "/" in repo_id:
        _, name = repo_id.split("/", 1)
    else:
        name = repo_id
    m = _REPO_NAME_PATTERN.match(name)
    if not m:
        return None
    family, skeleton, dataset, ver = m.groups()
    # Normalize skeleton for display (as is for now)
    skeleton_display = skeleton
    # Include family so Kimodo-SOMA-RP and TMR-SOMA-RP have distinct keys.
    short_key = f"{family.lower()}-{skeleton.lower()}-{dataset.lower()}"
    return ModelInfo(
        repo_id=repo_id,
        short_key=short_key,
        family=family,
        skeleton=skeleton_display,
        dataset=dataset,
        version=f"v{ver}",
        display_name=name,
    )


def _version_tuple(v: str) -> tuple[int, ...]:
    """Parse 'vN' or 'vN.M' into a comparable tuple of ints."""
    if v.startswith("v"):
        parts = v[1:].split(".")
        if all(p.isdigit() for p in parts):
            return tuple(int(p) for p in parts)
    return (0,)


def _version_key(info: ModelInfo) -> tuple[int, ...]:
    return _version_tuple(info.version)


def _build_registry() -> tuple[list[ModelInfo], dict[str, str], list[str]]:
    """Build model infos, short_key -> repo_id map, and list of short keys.

    When multiple versions exist for the same (family, skeleton, dataset), each ModelInfo gets a
    version-specific short_key (e.g. kimodo-soma-rp-v1, kimodo-soma-rp-v2) and a versionless alias
    (kimodo-soma-rp) is added to MODEL_NAMES pointing to the latest version.  When only one version
    exists, the short_key stays versionless (e.g. kimodo-smplx-rp).
    """
    all_repos = KIMODO_REPO_IDS + TMR_REPO_IDS
    infos: list[ModelInfo] = []
    for repo_id in all_repos:
        info = _parse_repo_id(repo_id)
        if info is None:
            raise ValueError(f"Registry repo ID does not match expected pattern: {repo_id}")
        infos.append(info)

    # Group by base short_key to detect multi-version families.
    base_groups: dict[str, list[ModelInfo]] = {}
    for info in infos:
        base_groups.setdefault(info.short_key, []).append(info)

    # For groups with multiple versions, make each short_key version-specific.
    for base_key, group in base_groups.items():
        if len(group) > 1:
            for info in group:
                info.short_key = f"{base_key}-{info.version}"

    # Map each (now unique) short_key to its repo_id.
    model_names: dict[str, str] = {}
    for info in infos:
        model_names[info.short_key] = info.repo_id

    # Add versionless aliases for multi-version groups, pointing to the latest.
    for base_key, group in base_groups.items():
        if len(group) > 1:
            latest = max(group, key=_version_key)
            model_names[base_key] = latest.repo_id

    return infos, model_names, list(model_names.keys())


MODEL_INFOS, MODEL_NAMES, _SHORT_KEYS = _build_registry()
AVAILABLE_MODELS = _SHORT_KEYS

# Short-key lists for Kimodo vs TMR (load_model uses TMR_MODELS to branch).
KIMODO_MODELS = [info.short_key for info in MODEL_INFOS if info.family == "Kimodo"]
TMR_MODELS = [info.short_key for info in MODEL_INFOS if info.family == "TMR"]

# Backward compatibility: FRIENDLY_NAMES for any code that still expects it.
# Includes versioned short_keys and versionless aliases (latest display name).
FRIENDLY_NAMES = {info.short_key: info.display_name for info in MODEL_INFOS}
for _key, _repo_id in MODEL_NAMES.items():
    if _key not in FRIENDLY_NAMES:
        for _info in MODEL_INFOS:
            if _info.repo_id == _repo_id:
                FRIENDLY_NAMES[_key] = _info.display_name
                break

DEFAULT_MODEL = "kimodo-soma-rp"
DEFAULT_TEXT_ENCODER_URL = "http://127.0.0.1:9550/"

# Friendly names for skeleton dropdown (key -> label).
SKELETON_DISPLAY_NAMES = {
    "SOMA": "SOMA Human Body",
    "SMPLX": "SMPLX Human Body",
    "G1": "Unitree G1 Humanoid Robot",
}

# Order for skeleton dropdown: SOMA, SMPLX, G1.
SKELETON_ORDER = ("SOMA", "SMPLX", "G1")


def get_skeleton_display_name(skeleton_key: str) -> str:
    """Return the UI label for a skeleton key (e.g. SOMA -> SOMA Human Body)."""
    return SKELETON_DISPLAY_NAMES.get(skeleton_key, skeleton_key)


def get_skeleton_key_from_display_name(display_name: str) -> Optional[str]:
    """Return the skeleton key for a UI label, or None."""
    for key, label in SKELETON_DISPLAY_NAMES.items():
        if label == display_name:
            return key
    return None


def get_skeleton_display_names_for_dataset(dataset_ui_label: str, family: Optional[str] = None) -> list[str]:
    """Return skeleton UI labels for the given dataset.

    If family is set (e.g. "Kimodo"), only skeletons with a model of that family are included.
    """
    keys = get_skeletons_for_dataset(dataset_ui_label, family=family)
    return [get_skeleton_display_name(k) for k in keys]


def get_short_key(repo_id: str) -> Optional[str]:
    """Return the short key for a repo ID, or None if not in registry."""
    for info in MODEL_INFOS:
        if info.repo_id == repo_id:
            return info.short_key
    return None


def get_model_info(short_key: str) -> Optional[ModelInfo]:
    """Return ModelInfo for a short key, or None if not found.

    When multiple versions share the same short_key, returns the one used for loading (the latest
    version), so CHECKPOINT_DIR and HF use the same version.
    """
    repo_id = MODEL_NAMES.get(short_key)
    if repo_id is None:
        return None
    for info in MODEL_INFOS:
        if info.repo_id == repo_id:
            return info
    return None


def get_short_key_from_display_name(display_name: str) -> Optional[str]:
    """Return short_key for a display name (e.g. Kimodo-SOMA-RP-v1), or None."""
    for info in MODEL_INFOS:
        if info.display_name == display_name:
            return info.short_key
    return None


def get_models_for_demo() -> list[ModelInfo]:
    """Return all model infos in registry order (for demo model list)."""
    return list(MODEL_INFOS)


def get_datasets(family: Optional[str] = None) -> list[str]:
    """Return unique dataset UI labels (Rigplay, SEED) present in registry.

    If family is set (e.g. "Kimodo"), only datasets that have a model of that family are included.
    """
    infos = MODEL_INFOS
    if family is not None:
        infos = [i for i in infos if i.family == family]
    labels = set()
    for info in infos:
        labels.add(info.dataset_ui_label)
    return sorted(labels)


def get_skeletons_for_dataset(dataset_ui_label: str, family: Optional[str] = None) -> list[str]:
    """Return skeleton names that have a model for the given dataset.

    Order: SOMA, SMPLX, G1 (only those present for the dataset).
    If family is set (e.g. "Kimodo"), only skeletons with a model of that
    family are included.
    """
    dataset = "RP" if dataset_ui_label == "Rigplay" else "SEED"
    infos = MODEL_INFOS
    if family is not None:
        infos = [i for i in infos if i.family == family]
    skeletons = set()
    for info in infos:
        if info.dataset == dataset:
            skeletons.add(info.skeleton)
    return [s for s in SKELETON_ORDER if s in skeletons]


def get_versions_for_dataset_skeleton(dataset_ui_label: str, skeleton: str) -> list[str]:
    """Return version strings (e.g. v1) for the given dataset/skeleton.

    Sorted by version number so the last element is the highest (e.g. v1, v2).
    """
    dataset = "RP" if dataset_ui_label == "Rigplay" else "SEED"
    versions = []
    for info in MODEL_INFOS:
        if info.dataset == dataset and info.skeleton == skeleton:
            versions.append(info.version)

    return sorted(set(versions), key=_version_tuple)


def get_models_for_dataset_skeleton(
    dataset_ui_label: str, skeleton: str, family: Optional[str] = None
) -> list[ModelInfo]:
    """Return model infos for the given dataset/skeleton, sorted by version (max first).

    Used to build the Version dropdown (options = full display names, one per model). If family is
    set (e.g. "Kimodo"), only models of that family are returned.
    """
    dataset = "RP" if dataset_ui_label == "Rigplay" else "SEED"
    infos = [info for info in MODEL_INFOS if info.dataset == dataset and info.skeleton == skeleton]
    if family is not None:
        infos = [i for i in infos if i.family == family]

    return sorted(infos, key=_version_key, reverse=True)


def resolve_to_short_key(dataset_ui_label: str, skeleton: str, version: str) -> Optional[str]:
    """Return the short key for (dataset, skeleton, version), or None."""
    for info in MODEL_INFOS:
        if info.dataset_ui_label == dataset_ui_label and info.skeleton == skeleton and info.version == version:
            return info.short_key
    return None


# -----------------------------------------------------------------------------
# Flexible model name resolution (partial names, case-insensitive, defaults)
# -----------------------------------------------------------------------------

_FAMILY_ALIASES = {"kimodo": "Kimodo", "tmr": "TMR"}
_DATASET_ALIASES = {"rp": "RP", "rigplay": "RP", "seed": "SEED"}
_SKELETON_ALIASES = {
    "soma": "SOMA",
    "smplx": "SMPLX",
    "g1": "G1",
}


def _normalize_family(s: str) -> Optional[str]:
    """Return canonical family (Kimodo/TMR) or None if unknown."""
    return _FAMILY_ALIASES.get(s.strip().lower())


def _normalize_dataset(s: str) -> Optional[str]:
    """Return canonical dataset (RP/SEED) or None if unknown."""
    return _DATASET_ALIASES.get(s.strip().lower())


def _normalize_skeleton(s: str) -> Optional[str]:
    """Return canonical skeleton (SOMA/SMPLX/G1) or None if unknown."""
    return _SKELETON_ALIASES.get(s.strip().lower())


def _get_latest_for_family_skeleton_dataset(family: str, skeleton: str, dataset: str) -> Optional[ModelInfo]:
    """Return the model info with the highest version for (family, skeleton, dataset)."""
    candidates = [
        info for info in MODEL_INFOS if info.family == family and info.skeleton == skeleton and info.dataset == dataset
    ]
    if not candidates:
        return None
    return max(candidates, key=_version_key)


def kimodo_short_key_for_skeleton_dataset(skeleton: str, dataset: str) -> Optional[str]:
    """Return the latest Kimodo model short_key for ``skeleton`` and ``dataset`` (RP/SEED), or
    None."""
    info = _get_latest_for_family_skeleton_dataset("Kimodo", skeleton, dataset)
    return info.short_key if info is not None else None


def registry_skeleton_for_joint_count(nb_joints: int) -> str:
    """Map motion joint count to registry skeleton key (SOMA / SMPLX / G1)."""
    if nb_joints == 34:
        return "G1"
    if nb_joints == 22:
        return "SMPLX"
    if nb_joints in (77, 30):
        return "SOMA"
    raise ValueError(f"No Kimodo model registered for motion with J={nb_joints}")


# Optional version: Family-Skeleton-Dataset-vN or Family-Skeleton-Dataset
_RESOLVE_FULL_PATTERN = re.compile(
    r"^(Kimodo|TMR|kimodo|tmr)[\-_]" r"([A-Za-z0-9]+)[\-_]" r"(RP|SEED|rp|seed)" r"(?:[\-_]v(\d+(?:\.\d+)*))?$",
    re.IGNORECASE,
)
# Partial: Skeleton-Dataset or Skeleton or Dataset (no family)
_RESOLVE_PARTIAL_PATTERN = re.compile(
    r"^([A-Za-z0-9]+)(?:[\-_](RP|SEED|rp|seed))?(?:[\-_]v(\d+(?:\.\d+)*))?$",
    re.IGNORECASE,
)


def resolve_model_name(name: Optional[str], default_family: Optional[str] = None) -> str:
    """Resolve a user-facing model name to a short_key.

    Accepts full names (e.g. Kimodo-SOMA-RP-v1), case-insensitive matching,
    and partial names with defaults: dataset=RP, skeleton=SOMA, family from
    default_family (Kimodo for demo/generation, TMR for embed script).
    Omitted version resolves to the latest for that model.

    Args:
        name: User-provided name (can be None or empty).
        default_family: "Kimodo" or "TMR" when name is empty or omits family.

    Returns:
        Short key (e.g. kimodo-soma-rp) for use with load_model / MODEL_NAMES.

    Raises:
        ValueError: If name cannot be resolved or default_family is missing when needed.
    """
    if name is not None:
        name = name.strip()
    if not name:
        if default_family is None:
            raise ValueError('Model name is empty; provide a name or set default_family ("Kimodo" or "TMR").')
        fam = _normalize_family(default_family)
        if fam is None:
            raise ValueError(f"default_family must be 'Kimodo' or 'TMR', got {default_family!r}")
        info = _get_latest_for_family_skeleton_dataset(fam, "SOMA", "RP")
        if info is None:
            raise ValueError(f"No model found for {fam}-SOMA-RP. Available: {list(MODEL_NAMES.keys())}")
        return info.short_key

    # Exact short_key
    if name in MODEL_NAMES:
        return name

    # Case-insensitive match against short_key or display_name
    name_lower = name.lower()
    matches = []
    for info in MODEL_INFOS:
        if name_lower == info.short_key.lower():
            matches.append(info)
        disp = info.display_name.lower()
        if name_lower == disp or name_lower == ("nvidia/" + disp):
            matches.append(info)
    if len(matches) == 1:
        return matches[0].short_key
    if len(matches) > 1:
        return matches[0].short_key

    # Parsed full form: Family-Skeleton-Dataset or Family-Skeleton-Dataset-vN
    m = _RESOLVE_FULL_PATTERN.match(name)
    if m:
        fam_raw, skel_raw, ds_raw, ver_num = m.groups()
        fam = _normalize_family(fam_raw)
        skel = _normalize_skeleton(skel_raw)
        ds = _normalize_dataset(ds_raw)
        if fam is not None and skel is not None and ds is not None:
            if ver_num is not None:
                version = f"v{ver_num}"
                for info in MODEL_INFOS:
                    if info.family == fam and info.skeleton == skel and info.dataset == ds and info.version == version:
                        return info.short_key
            else:
                info = _get_latest_for_family_skeleton_dataset(fam, skel, ds)
                if info is not None:
                    return info.short_key

    # Parsed partial: Skeleton-Dataset, Skeleton, or Dataset (use default_family)
    if default_family is not None:
        m = _RESOLVE_PARTIAL_PATTERN.match(name)
        if m:
            tok1, ds_raw, ver_num = m.groups()
            fam = _normalize_family(default_family)
            if fam is not None:
                skel = _normalize_skeleton(tok1)
                ds_candidate = _normalize_dataset(ds_raw) if ds_raw else None
                if skel is not None and ds_candidate is not None:
                    ds = ds_candidate
                elif skel is not None:
                    ds = "RP"
                else:
                    skel = "SOMA"
                    ds = _normalize_dataset(tok1) if tok1 else "RP"
                    if ds is None:
                        ds = "RP"
                if ver_num is not None:
                    version = f"v{ver_num}"
                    for info in MODEL_INFOS:
                        if (
                            info.family == fam
                            and info.skeleton == skel
                            and info.dataset == ds
                            and info.version == version
                        ):
                            return info.short_key
                else:
                    info = _get_latest_for_family_skeleton_dataset(fam, skel, ds)
                    if info is not None:
                        return info.short_key

        # Single token: skeleton or dataset
        fam = _normalize_family(default_family)
        if fam is not None:
            skel = _normalize_skeleton(name)
            if skel is not None:
                info = _get_latest_for_family_skeleton_dataset(fam, skel, "RP")
                if info is not None:
                    return info.short_key
            ds = _normalize_dataset(name)
            if ds is not None:
                info = _get_latest_for_family_skeleton_dataset(fam, "SOMA", ds)
                if info is not None:
                    return info.short_key

    raise ValueError(
        f"Model name {name!r} could not be resolved. "
        f"Use a short key (e.g. {list(MODEL_NAMES.keys())[:3]}...), "
        "a full name (e.g. Kimodo-SOMA-RP-v1), or a partial (e.g. SOMA-RP, SOMA) "
        "with default_family set."
    )


================================================
FILE: kimodo/model/text_encoder_api.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Remote text encoder API client (Gradio) for motion generation."""

import logging

import numpy as np
import torch
from gradio_client import Client

# Suppress the [httpx] logs (GET requests)
logging.getLogger("httpx").setLevel(logging.WARNING)

# Suppress internal gradio_client logs
logging.getLogger("gradio_client").setLevel(logging.WARNING)


class TextEncoderAPI:
    """Text encoder API client for motion generation."""

    def __init__(self, url: str):
        self.client = Client(url, verbose=False)
        self.device = "cpu"
        self.dtype = torch.float

    def _create_np_random_name(self):
        import uuid

        return str(uuid.uuid4()) + ".npy"

    def to(self, device=None, dtype=None):
        if device is not None:
            self.device = device
        if dtype is not None:
            self.dtype = dtype
        return self

    def __call__(self, texts):
        """Encode text prompts into tensors.

        Args:
            texts (str | list[str]): text prompts to encode

        Returns:
            tuple[torch.Tensor, list[int]]: encoded text tensors and their lengths
        """
        if isinstance(texts, str):
            texts = [texts]

        tensors = []
        lengths = []
        for text in texts:
            filename = self._create_np_random_name()

            result = self.client.predict(
                text=text,
                filename=filename,
                api_name="/DemoWrapper",
            )
            path = result[0]["value"]
            tensor = np.load(path)
            length = tensor.shape[0]

            tensors.append(tensor)
            lengths.append(length)

        padded_tensor = np.zeros((len(lengths), max(lengths), tensors[0].shape[-1]), dtype=tensors[0].dtype)
        for idx, (tensor, length) in enumerate(zip(tensors, lengths)):
            padded_tensor[idx, :length] = tensor

        padded_tensor = torch.from_numpy(padded_tensor)
        padded_tensor = padded_tensor.to(device=self.device, dtype=self.dtype)
        return padded_tensor, lengths


================================================
FILE: kimodo/model/tmr.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""TMR model: encoder, and text-to-motion retrieval head."""

import contextlib
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import torch
import torch.nn as nn
from einops import repeat
from torch import Tensor

from kimodo.model import load_checkpoint_state_dict
from kimodo.motion_rep.feature_utils import length_to_mask
from kimodo.sanitize import sanitize_texts
from kimodo.skeleton import SkeletonBase, build_skeleton
from kimodo.tools import ensure_batched


class PositionalEncoding(nn.Module):
    """Sinusoidal positional encoding for sequences (batch_first optional)."""

    def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False) -> None:
        super().__init__()
        self.batch_first = batch_first

        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        # Note: have to replace torch.exp() and math.log() with torch.pow()
        # due to MKL exp() and ln() throws floating point exceptions on certain CPUs
        div_term = torch.pow(10000.0, -torch.arange(0, d_model, 2).float() / d_model)
        # div_term = torch.exp(
        #     torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model)
        # )

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pe", pe, persistent=False)

    def forward(self, x: Tensor) -> Tensor:
        if self.batch_first:
            x = x + self.pe.permute(1, 0, 2)[:, : x.shape[1], :]
        else:
            x = x + self.pe[: x.shape[0], :]
        return self.dropout(x)


def load_ckpt(self, ckpt_path):
    """Load model weights from checkpoint path."""
    state_dict = load_checkpoint_state_dict(ckpt_path)
    self.load_state_dict(state_dict)


class ACTORStyleEncoder(nn.Module):
    """Motion encoder in ACTOR style: optional motion_rep projection, VAE/MLP tokens, transformer."""

    def __init__(
        self,
        motion_rep: Optional[nn.Module],
        llm_shape: Optional[Tuple],
        vae: bool,
        latent_dim: int = 256,
        ff_size: int = 1024,
        num_layers: int = 4,
        num_heads: int = 4,
        dropout: float = 0.1,
        activation: str = "gelu",
        ckpt_path: Optional[str] = None,
    ) -> None:
        super().__init__()

        self.motion_rep = motion_rep
        if motion_rep is not None and llm_shape is None:
            nfeats = motion_rep.motion_rep_dim
        elif motion_rep is None and llm_shape is not None:
            nfeats = llm_shape[-1]
        else:
            raise ValueError

        self.nfeats = nfeats
        self.projection = nn.Linear(nfeats, latent_dim)

        self.vae = vae
        self.nbtokens = 2 if vae else 1
        self.tokens = nn.Parameter(torch.randn(self.nbtokens, latent_dim))

        self.sequence_pos_encoding = PositionalEncoding(latent_dim, dropout=dropout, batch_first=True)

        seq_trans_encoder_layer = nn.TransformerEncoderLayer(
            d_model=latent_dim,
            nhead=num_heads,
            dim_feedforward=ff_size,
            dropout=dropout,
            activation=activation,
            batch_first=True,
        )

        self.seqTransEncoder = nn.TransformerEncoder(
            seq_trans_encoder_layer,
            num_layers=num_layers,
            enable_nested_tensor=False,
        )

        if ckpt_path is not None:
            load_ckpt(self, ckpt_path)

    def forward(self, x_dict: Dict) -> Tensor:
        x = x_dict["x"]
        mask = x_dict["mask"]

        x = self.projection(x)

        device = x.device
        bs = len(x)

        tokens = repeat(self.tokens, "nbtoken dim -> bs nbtoken dim", bs=bs)
        xseq = torch.cat((tokens, x), 1)

        token_mask = torch.ones((bs, self.nbtokens), dtype=bool, device=device)
        aug_mask = torch.cat((token_mask, mask), 1)

        # add positional encoding
        xseq = self.sequence_pos_encoding(xseq)
        final = self.seqTransEncoder(xseq, src_key_padding_mask=~aug_mask)
        return final[:, : self.nbtokens]


class TMR(nn.Module):
    r"""TMR: Text-to-Motion Retrieval inference code (no decoder)
    Find more information about the model on the following website:
    https://mathis.petrovich.fr/tmr
    """

    @classmethod
    def from_args(
        cls,
        motion_rep: nn.Module,
        llm_shape: tuple | list,
        vae: bool,
        latent_dim: int = 256,
        ff_size: int = 1024,
        num_layers: int = 4,
        num_heads: int = 4,
        dropout: float = 0.1,
        activation: str = "gelu",
        ckpt_folder: Optional[str] = None,
        device: Optional[str] = None,
        **kwargs,
    ):
        motion_encoder, top_text_encoder = None, None

        motion_encoder = ACTORStyleEncoder(
            motion_rep=motion_rep,
            llm_shape=None,
            vae=vae,
            latent_dim=latent_dim,
            ff_size=ff_size,
            num_layers=num_layers,
            num_heads=num_heads,
            dropout=dropout,
            activation=activation,
            ckpt_path=Path(ckpt_folder) / "motion_encoder.pt",
        ).to(device)

        top_text_encoder = ACTORStyleEncoder(
            motion_rep=None,
            llm_shape=llm_shape,
            vae=vae,
            latent_dim=latent_dim,
            ff_size=ff_size,
            num_layers=num_layers,
            num_heads=num_heads,
            dropout=dropout,
            activation=activation,
            ckpt_path=Path(ckpt_folder) / "text_encoder.pt",
        ).to(device)
        return cls(
            motion_encoder,
            top_text_encoder,
            vae,
            device=device,
            **kwargs,
        )

    def __init__(
        self,
        motion_encoder: nn.Module,
        top_text_encoder: nn.Module,
        vae: bool,
        text_encoder: Optional = None,
        fact: Optional[float] = None,
        sample_mean: Optional[bool] = True,
        unit_vector: Optional[bool] = False,
        compute_grads: bool = False,
        device: Optional[str] = None,
    ) -> None:
        super().__init__()

        self.motion_encoder = motion_encoder
        self.text_encoder = top_text_encoder
        self.raw_text_encoder = text_encoder

        self.motion_rep = None
        self.skeleton = None
        if self.motion_encoder is not None:
            self.motion_rep = self.motion_encoder.motion_rep
        if self.motion_rep is not None:
            self.skeleton = self.motion_rep.skeleton

        self.compute_grads = compute_grads

        self.device = device

        # sampling parameters
        self.vae = vae
        self.fact = fact if fact is not None else 1.0
        self.sample_mean = sample_mean
        self.unit_vector = unit_vector

    def full_text_encoder(self, texts: list[str]):
        assert isinstance(texts, list), "The input should be batched."
        # sanitize the texts first
        # then encode the text, and then use the top text encoder
        texts = sanitize_texts(texts)
        text_feat, text_length = self.raw_text_encoder(texts)
        if isinstance(text_length, list):
            text_length = torch.tensor(text_length, device=self.device)
        else:
            text_length = text_length.to(self.device)
        inputs = {
            "x": text_feat.to(self.device),
            "mask": length_to_mask(text_length, device=self.device),
        }
        return self.text_encoder(inputs)

    def _find_encoder(self, inputs, modality):
        assert modality in ["text", "motion", "raw_text", "auto"]

        if modality == "text":
            return self.text_encoder
        elif modality == "motion":
            return self.motion_encoder
        elif modality == "raw_text":
            return self.full_text_encoder

        if isinstance(inputs[0], str):
            return self.full_text_encoder

        m_nfeats = self.motion_encoder.nfeats
        t_nfeats = self.text_encoder.nfeats

        if m_nfeats == t_nfeats:
            raise ValueError("Cannot automatically find the encoder, as they share the same input space.")

        nfeats = inputs["x"].shape[-1]
        if nfeats == m_nfeats:
            return self.motion_encoder
        elif nfeats == t_nfeats:
            return self.text_encoder
        else:
            raise ValueError("The inputs is not recognized.")

    def _encode(
        self,
        inputs,
        modality: str = "auto",
        sample_mean: Optional[bool] = None,
        fact: Optional[float] = None,
        return_distribution: bool = False,
        unit_vector: Optional[bool] = None,
    ):
        sample_mean = self.sample_mean if sample_mean is None else sample_mean
        fact = self.fact if fact is None else fact
        unit_vector = self.unit_vector if unit_vector is None else unit_vector

        # Encode the inputs
        encoder = self._find_encoder(inputs, modality)
        encoded = encoder(inputs)

        # Sampling
        if self.vae:
            dists = encoded.unbind(1)
            mu, logvar = dists
            if sample_mean:
                latent_vectors = mu
            else:
                # Reparameterization trick
                std = logvar.exp().pow(0.5)
                eps = std.data.new(std.size()).normal_()
                latent_vectors = mu + fact * eps * std
        else:
            dists = None
            (latent_vectors,) = encoded.unbind(1)

        if unit_vector:
            latent_vectors = torch.nn.functional.normalize(latent_vectors, dim=-1)

        if return_distribution:
            return latent_vectors, dists

        return latent_vectors

    @ensure_batched(posed_joints=4, lengths=1)
    def encode_motion(
        self,
        posed_joints: torch.Tensor,
        original_skeleton: Optional[SkeletonBase] = None,
        lengths: Optional[torch.Tensor] = None,
        unit_vector: Optional[bool] = None,
    ):
        # TODO here.
        convert_ctx = torch.no_grad() if not self.compute_grads else contextlib.nullcontext()

        if original_skeleton is None:
            original_skeleton = build_skeleton(posed_joints.shape[-2])

        if lengths is None:
            nbatch, nbframes = posed_joints.shape[:2]
            device = posed_joints.device
            assert nbatch == 1, "If lenghts is not provided, the input should not be batched."
            lengths = torch.tensor([nbframes], device=device)

        # slice the posed joints if we use less joints
        skel_slice = self.motion_rep.skeleton.get_skel_slice(original_skeleton)
        posed_joints = posed_joints[..., skel_slice, :]

        with convert_ctx:
            features = self.motion_rep(
                posed_joints=posed_joints,
                to_canonicalize=True,
                to_normalize=True,
                lengths=lengths,
            )
            mask = length_to_mask(lengths, device=features.device)
            x_dict = {"x": features, "mask": mask}
            latent_vectors = self._encode(
                x_dict,
                modality="motion",
                unit_vector=unit_vector,
            )
        return latent_vectors

    def encode_text(
        self,
        x_dict: Dict,
        unit_vector: Optional[bool] = None,
    ):
        # TODO: make it ensure batched
        convert_ctx = torch.no_grad() if not self.compute_grads else contextlib.nullcontext()

        with convert_ctx:
            latent_vectors = self._encode(
                x_dict,
                modality="text",
                unit_vector=unit_vector,
            )
        return latent_vectors

    def encode_raw_text(
        self,
        texts: List[str],
        unit_vector: Optional[bool] = None,
    ):
        is_batched = True
        if isinstance(texts, str):
            is_batched = False
            texts = [texts]

        convert_ctx = torch.no_grad() if not self.compute_grads else contextlib.nullcontext()

        with convert_ctx:
            latent_vectors = self._encode(
                texts,
                modality="raw_text",
                unit_vector=unit_vector,
            )
        if not is_batched:
            latent_vectors = latent_vectors[0]
        return latent_vectors


================================================
FILE: kimodo/model/twostage_denoiser.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Two-stage transformer denoiser: root stage then body stage for motion diffusion."""

import contextlib
from typing import Optional

import torch
from torch import nn

from .backbone import TransformerEncoderBlock
from .loading import load_checkpoint_state_dict


class TwostageDenoiser(nn.Module):
    """Two-stage denoiser: first predicts global root features, then body features conditioned on local root."""

    def __init__(
        self,
        motion_rep,
        motion_mask_mode,
        ckpt_path: Optional[str] = None,
        **kwargs,
    ):
        """Build root and body transformer blocks; optionally load checkpoint from ckpt_path."""
        super().__init__()
        self.motion_rep = motion_rep
        self.motion_mask_mode = motion_mask_mode

        # it should be a dual motion_rep
        # and be global by default
        # global motion_rep as inpnut
        input_dim = motion_rep.motion_rep_dim
        will_concatenate = motion_mask_mode == "concat"

        # stage 1: root only
        root_input_dim = input_dim * 2 if will_concatenate else input_dim
        root_output_dim = motion_rep.global_root_dim

        self.root_model = TransformerEncoderBlock(
            input_dim=root_input_dim,
            output_dim=root_output_dim,
            skeleton=self.motion_rep.skeleton,
            **kwargs,
        )

        # replace the global root by the local root
        local_motion_rep_dim = input_dim - motion_rep.global_root_dim + motion_rep.local_root_dim

        # stage 2: local body
        body_input_dim = local_motion_rep_dim + (
            input_dim if will_concatenate else 0
        )  # body stage always takes in local root info for motion (but still the global mask)

        body_output_dim = input_dim - motion_rep.global_root_dim
        self.body_model = TransformerEncoderBlock(
            input_dim=body_input_dim,
            output_dim=body_output_dim,
            skeleton=self.motion_rep.skeleton,
            **kwargs,
        )

        if ckpt_path:
            self.load_ckpt(ckpt_path)

    def load_ckpt(self, ckpt_path: str) -> None:
        """Load checkpoint from path; state dict keys are stripped of 'denoiser.backbone.'
        prefix."""
        state_dict = load_checkpoint_state_dict(ckpt_path)
        state_dict = {key.replace("denoiser.backbone.", ""): val for key, val in state_dict.items()}
        self.load_state_dict(state_dict)

    def forward(
        self,
        x: torch.Tensor,
        x_pad_mask: torch.Tensor,
        text_feat: torch.Tensor,
        text_feat_pad_mask: torch.Tensor,
        timesteps: torch.Tensor,
        first_heading_angle: Optional[torch.Tensor] = None,
        motion_mask: Optional[torch.Tensor] = None,
        observed_motion: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """
        Args:
            x (torch.Tensor): [B, T, dim_motion] current noisy motion
            x_pad_mask (torch.Tensor): [B, T] attention mask, positions with True are allowed to attend, False are not
            text_feat (torch.Tensor): [B, max_text_len, llm_dim] embedded text prompts
            text_feat_pad_mask (torch.Tensor): [B, max_text_len] attention mask, positions with True are allowed to attend, False are not
            timesteps (torch.Tensor): [B,] current denoising step
            motion_mask
            observed_motion

        Returns:
            torch.Tensor: same size as input x
        """

        if self.motion_mask_mode == "concat":
            if motion_mask is None or observed_motion is None:
                motion_mask = torch.zeros_like(x)
                observed_motion = torch.zeros_like(x)
            x = x * (1 - motion_mask) + observed_motion * motion_mask
            x_extended = torch.cat([x, motion_mask], axis=-1)
        else:
            x_extended = x

        # Stage 1: predict root motion in global
        root_motion_pred = self.root_model(
            x_extended,
            x_pad_mask,
            text_feat,
            text_feat_pad_mask,
            timesteps,
            first_heading_angle,
        )  # [B, T, 5]

        # Maybe pass this as argument instead of recomputing it
        lengths = x_pad_mask.sum(-1)

        # Convert root pred to local rep
        # At test-time want to allow gradient through for guidance
        convert_ctx = torch.no_grad() if self.training else contextlib.nullcontext()
        with convert_ctx:
            root_motion_local = self.motion_rep.global_root_to_local_root(
                root_motion_pred,
                normalized=True,
                lengths=lengths,
            )
        if self.training:
            root_motion_local = root_motion_local.detach()

        # concatenate the predicted local root with the body motion
        body_x = x[..., self.motion_rep.body_slice]
        x_new = torch.cat([root_motion_local, body_x], axis=-1)

        if self.motion_mask_mode == "concat":
            x_new_extended = torch.cat([x_new, motion_mask], axis=-1)
        else:
            x_new_extended = x_new

        # Stage 2: predict local body motion based on local root
        predicted_body = self.body_model(
            x_new_extended,
            x_pad_mask,
            text_feat,
            text_feat_pad_mask,
            timesteps,
            first_heading_angle,
        )

        # concatenate the predicted local body with the predicted root
        output = torch.cat([root_motion_pred, predicted_body], axis=-1)
        return output


================================================
FILE: kimodo/motion_rep/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Motion representation utilities."""

from .reps import KimodoMotionRep, MotionRepBase, TMRMotionRep

__all__ = [
    "MotionRepBase",
    "KimodoMotionRep",
    "TMRMotionRep",
]


================================================
FILE: kimodo/motion_rep/conditioning.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Constraint conditioning: build index and data dicts from constraint sets for the denoiser."""

from collections import defaultdict

import torch


def build_condition_dicts(constraints_lst: list):
    index_dict = defaultdict(list)
    data_dict = defaultdict(list)
    for constraint in constraints_lst:
        constraint.update_constraints(data_dict, index_dict)
    return index_dict, data_dict


def get_unique_index_and_data(indices_lst, data):
    # unique + sort them by t
    indices_unique, inverse = torch.unique(indices_lst, dim=0, return_inverse=True)
    # pick first value for each unique (t, j)
    first_idx = torch.zeros(indices_unique.size(0), dtype=torch.long, device=inverse.device)
    first_idx.scatter_(0, inverse, torch.arange(len(inverse), device=inverse.device))
    assert (indices_lst[first_idx] == indices_unique).all()
    # get the data
    indices_lst = indices_lst[first_idx]
    data = data[first_idx]
    return indices_lst, data


================================================
FILE: kimodo/motion_rep/feature_utils.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Motion representation helpers: velocity, heading, masks, and rotation of features."""

from typing import List, Optional, Union

import einops
import torch

from kimodo.geometry import cont6d_to_matrix, matrix_to_cont6d
from kimodo.skeleton import SkeletonBase
from kimodo.tools import ensure_batched


def diff_angles(angles: torch.Tensor, fps: float) -> torch.Tensor:
    """Compute frame-to-frame angular differences in radians, scaled by fps.

    Args:
        angles: [..., T] batched sequences of rotation angles in radians.
        fps: Sampling rate used to convert frame differences to per-second rate.

    Returns:
        [..., T-1] difference between consecutive angles (rad/s).
    """

    cos = torch.cos(angles)
    sin = torch.sin(angles)

    cos_diff = cos[..., 1:] * cos[..., :-1] + sin[..., 1:] * sin[..., :-1]
    sin_diff = sin[..., 1:] * cos[..., :-1] - cos[..., 1:] * sin[..., :-1]

    # should be close to angles.diff() but more robust
    # multiply by fps = 1 / dt
    angles_diff = fps * torch.arctan2(sin_diff, cos_diff)
    return angles_diff


@ensure_batched(positions=4, lengths=1)
def compute_vel_xyz(
    positions: torch.Tensor,
    fps: float,
    lengths: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """Compute the velocities from positions: dx/dt. Works with batches. The last velocity is duplicated to keep the same size.

    Args:
        positions (torch.Tensor): [..., T, J, 3] xyz positions of a human skeleton
        fps (float): frame per seconds
        lengths (Optional[torch.Tensor]): [...] size of each input batched. If not provided, positions should not be batched

    Returns:
        velocity (torch.Tensor): [..., T, J, 3] velocities computed from the positions
    """
    device = positions.device

    if lengths is None:
        assert positions.shape[0] == 1, "If lengths is not provided, the input should not be batched."
        lengths = torch.tensor([len(positions)], device=device)

    # useful for indexing
    range_len = torch.arange(len(lengths))

    # compute velocities with fps
    velocity = fps * (positions[:, 1:] - positions[:, :-1])
    # pading the velocity vector
    vel_pad = torch.zeros_like(velocity[:, 0])
    velocity, _ = einops.pack([velocity, vel_pad], "batch * nbjoints dim")

    # repeat the last velocities
    # with special care for different lengths with batches
    velocity[(range_len, lengths - 1)] = velocity[(range_len, lengths - 2)]
    return velocity


@ensure_batched(root_rot_angles=2, lengths=1)
def compute_vel_angle(
    root_rot_angles: torch.Tensor,
    fps: float,
    lengths: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """Compute the local root rotation velocity: dtheta/dt.

    Args:
        root_rot_angles (torch.Tensor): [..., T] rotation angle (in radian)
        fps (float): frame per seconds
        lengths (Optional[torch.Tensor]): [...] size of each input batched. If not provided, root_rot_angles should not be batched

    Returns:
        local_root_rot_vel (torch.Tensor): [..., T] local root rotation velocity (in radian/s)
    """
    device = root_rot_angles.device
    if lengths is None:
        assert root_rot_angles.shape[0] == 1, "If lengths is not provided, the input should not be batched."
        lengths = torch.tensor([len(root_rot_angles)], device=device)

    # useful for indexing
    range_len = torch.arange(len(lengths))

    local_root_rot_vel = diff_angles(root_rot_angles, fps)
    pad_rot_vel_angles = torch.zeros_like(root_rot_angles[:, 0])
    local_root_rot_vel, _ = einops.pack(
        [local_root_rot_vel, pad_rot_vel_angles],
        "batch *",
    )
    # repeat the last rotation angle
    # with special care for different lengths with batches
    local_root_rot_vel[(range_len, lengths - 1)] = local_root_rot_vel[(range_len, lengths - 2)]
    return local_root_rot_vel


@ensure_batched(posed_joints=4)
def compute_heading_angle(posed_joints: torch.Tensor, skeleton: SkeletonBase) -> torch.Tensor:
    """Compute the heading direction from joint positions using the hip vector.

    Args:
        posed_joints: [B, T, J, 3] global joint positions.
        skeleton: Skeleton instance used to get hip joint indices.

    Returns:
        [B] heading angle in radians.
    """
    # compute root heading for the sequence from hip positions
    r_hip, l_hip = skeleton.hip_joint_idx
    diff = posed_joints[:, :, r_hip] - posed_joints[:, :, l_hip]
    heading_angle = torch.atan2(diff[..., 2], -diff[..., 0])
    return heading_angle


def length_to_mask(
    length: Union[torch.Tensor, List],
    max_len: Optional[int] = None,
    device=None,
) -> torch.Tensor:
    """Convert sequence lengths to a boolean validity mask.

    Args:
        length: Sequence lengths, either a tensor ``[B]`` or a Python list.
        max_len: Optional mask width. If omitted, uses ``max(length)``.
        device: Optional device. When ``length`` is a list, this controls where
            the new tensor is created.

    Returns:
        A boolean tensor of shape ``[B, max_len]`` where ``True`` marks valid
        timesteps.
    """
    if isinstance(length, list):
        if device is None:
            device = "cpu"
        length = torch.tensor(length, device=device)

    # Use requested device for output; move length if needed so mask and length match
    if device is not None:
        target = torch.device(device)
        if length.device != target:
            length = length.to(target)
    device = length.device

    if max_len is None:
        max_len = max(length)

    mask = torch.arange(max_len, device=device).expand(len(length), max_len) < length.unsqueeze(1)
    return mask


class RotateFeatures:
    """Helper that applies a global heading rotation to motion features."""

    def __init__(self, angle: torch.Tensor):
        """Precompute 2D and 3D rotation matrices for a batch of angles.

        Args:
            angle: Rotation angle(s) in radians, shaped ``[B]``.
        """
        self.angle = angle

        ## Create the necessary rotations matrices
        cos, sin = torch.cos(angle), torch.sin(angle)
        one, zero = torch.ones_like(angle), torch.zeros_like(angle)

        # 2D rotation transposed (sin are -sin)
        self.corrective_mat_2d_T = torch.stack((cos, sin, -sin, cos), -1).reshape(angle.shape + (2, 2))
        # 3D rotation on Y axis
        self.corrective_mat_Y = torch.stack((cos, zero, sin, zero, one, zero, -sin, zero, cos), -1).reshape(
            angle.shape + (3, 3)
        )
        self.corrective_mat_Y_T = self.corrective_mat_Y.transpose(-2, -1).contiguous()

    def rotate_positions(self, positions: torch.Tensor):
        """Rotate 3D positions around the Y axis."""
        return positions @ self.corrective_mat_Y_T

    def rotate_2d_positions(self, positions_2d: torch.Tensor):
        """Rotate 2D ``(x, z)`` vectors in the ground plane."""
        return positions_2d @ self.corrective_mat_2d_T

    def rotate_rotations(self, rotations: torch.Tensor):
        """Left-multiply global rotation matrices by the heading correction."""
        # "Rotate" the global rotations
        # which means add an extra Y rotation after the transform
        # so at the left R' = R_y R
        # (since we use the convention x' = R x)
        # "bik,btdkj->btdij"

        B, T, J = rotations.shape[:3]
        BTJ = B * T * J
        return (
            self.corrective_mat_Y[:, None, None].expand(B, T, J, 3, 3).reshape(BTJ, 3, 3) @ rotations.reshape(BTJ, 3, 3)
        ).reshape(B, T, J, 3, 3)

    def rotate_6d_rotations(self, rotations_6d: torch.Tensor):
        """Rotate 6D rotation features via matrix conversion."""
        return matrix_to_cont6d(self.rotate_rotations(cont6d_to_matrix(rotations_6d)))


================================================
FILE: kimodo/motion_rep/feet.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Foot contact detection from joint positions and velocities."""

import torch

from ..tools import ensure_batched


@ensure_batched(positions=4, velocity=4)
def foot_detect_from_pos_and_vel(
    positions: torch.Tensor,
    velocity: torch.Tensor,
    skeleton,
    vel_thres: float,
    height_thresh: float,
) -> torch.Tensor:
    """Compute foot contact labels using heuristics combining joint height and velocities.

    Args:
        positions (torch.Tensor): [X, T, J, 3] global joint positions
        velocity (torch.Tensor): [X, T, J, 3] velocities (already padded correctly), already multiplied by 1 / dt
        vel_thres (float): threshold for joint velocity
        height_thresh (float): threshold for joint height

    Returns:
        torch.Tensor: [X, T, 4] contact labels for left and right foot joints
        (heel/toe order follows the skeleton joint index definition), where
        ``1`` denotes contact.
    """

    device = positions.device
    # Use at most 2 foot joints per side (ankle + toe); SOMA77 defines a
    # third end-effector (ToeEnd) that SOMA30 and other skeletons omit.
    fid_l = skeleton.left_foot_joint_idx[:2]
    fid_r = skeleton.right_foot_joint_idx[:2]

    velfactor, heightfactor = (
        torch.tensor([vel_thres, vel_thres], device=device),
        torch.tensor([height_thresh, height_thresh], device=device),
    )

    feet_l_v = torch.linalg.norm(velocity[:, :, fid_l], axis=-1)
    feet_l_h = positions[:, :, fid_l, 1]

    feet_l = torch.logical_and(
        feet_l_v < velfactor,
        feet_l_h < heightfactor,
    ).to(positions.dtype)

    feet_r_v = torch.linalg.norm(velocity[:, :, fid_r], axis=-1)
    feet_r_h = positions[:, :, fid_r, 1]

    feet_r = torch.logical_and(
        feet_r_v < velfactor,
        feet_r_h < heightfactor,
    ).to(positions.dtype)

    foot_contacts = torch.cat((feet_l, feet_r), axis=-1)
    return foot_contacts


================================================
FILE: kimodo/motion_rep/reps/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Motion representation implementations: base, Kimodo, and TMR."""

from .base import MotionRepBase
from .kimodo_motionrep import KimodoMotionRep
from .tmr_motionrep import TMRMotionRep

__all__ = [
    "MotionRepBase",
    "KimodoMotionRep",
    "TMRMotionRep",
]


================================================
FILE: kimodo/motion_rep/reps/base.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Base motion representation: feature layout, normalization, and conditioning helpers."""

import os
from typing import Optional

import einops
import numpy as np
import torch
from einops import repeat

from ...tools import ensure_batched
from ..conditioning import build_condition_dicts
from ..feature_utils import compute_vel_angle, compute_vel_xyz
from ..stats import Stats


def _require_split_stats_layout(stats_path: str) -> None:
    """Raise if stats_path does not contain the required global_root, local_root, body subdirs."""
    subdirs = ("global_root", "local_root", "body")
    missing = []
    for name in subdirs:
        subpath = os.path.join(stats_path, name)
        mean_path = os.path.join(subpath, "mean.npy")
        if not os.path.isfile(mean_path):
            missing.append(f"{subpath}/ (mean.npy)")
    if missing:
        raise FileNotFoundError(
            f"Checkpoint stats must use the split layout with subfolders "
            f"global_root/, local_root/, and body/ under '{stats_path}'. "
            f"Missing or incomplete: {', '.join(missing)}. "
        )


class MotionRepBase:
    """Base class for motion representations used in generation and conditioning.

    Subclasses define:
    - ``size_dict``: feature blocks and their shapes,
    - ``last_root_feature``: last entry of the root block,
    - ``local_root_size_dict``: local-root feature layout,
    and implement transform-specific methods such as ``__call__``, ``inverse``,
    ``rotate``, ``translate_2d`` and ``create_conditions``.
    """

    def __init__(
        self,
        skeleton,
        fps,
        stats_path: Optional[str] = None,
    ):
        """Initialize feature slicing metadata and optional normalization stats."""

        self.skeleton = skeleton
        self.fps = fps
        self.nbjoints = skeleton.nbjoints

        self.feature_names = list(self.size_dict.keys())
        self.ps = list(self.size_dict.values())
        self.nfeats_dict = {key: val.numel() for key, val in self.size_dict.items()}
        feats_cumsum = np.cumsum([0] + list(self.nfeats_dict.values())).tolist()
        self.slice_dict = {key: slice(feats_cumsum[i], feats_cumsum[i + 1]) for i, key in enumerate(self.feature_names)}

        self.motion_rep_dim = sum(self.nfeats_dict.values())
        self.root_slice = slice(0, self.slice_dict[self.last_root_feature].stop)
        self.body_slice = slice(self.root_slice.stop, self.motion_rep_dim)
        self.body_dim = self.body_slice.stop - self.body_slice.start
        self.global_root_dim = self.root_slice.stop
        self.local_root_dim = sum(val.numel() for val in self.local_root_size_dict.values())

        if stats_path:
            _require_split_stats_layout(stats_path)
            self.global_root_stats = Stats(os.path.join(stats_path, "global_root"))
            self.local_root_stats = Stats(os.path.join(stats_path, "local_root"))
            self.body_stats = Stats(os.path.join(stats_path, "body"))

            # Global stats
            mean = torch.cat([self.global_root_stats.mean, self.body_stats.mean])
            std = torch.cat([self.global_root_stats.std, self.body_stats.std])
            assert len(mean) == len(std) == self.motion_rep_dim, "There is an stat issue."
            self.stats = Stats()
            self.stats.register_from_tensors(mean, std)

    def get_root_pos(self, features: torch.Tensor, fallback_to_smooth: bool = True):
        """Extract root positions from a feature tensor.

        Supports both ``root_pos`` and ``smooth_root_pos`` representations.
        """
        if "root_pos" in self.slice_dict:
            return features[..., self.slice_dict["root_pos"]]

        if "smooth_root_pos" not in self.slice_dict:
            raise TypeError("This motion rep should have either a root_pos or smooth_root_pos field")

        if fallback_to_smooth:
            return features[:, :, self.slice_dict["smooth_root_pos"]]

        # else compute the root pos from the smooth root and local joints offset
        smooth_root_pos = features[:, :, self.slice_dict["smooth_root_pos"]].clone()
        local_joints_positions_flatten = features[..., self.slice_dict["local_joints_positions"]]
        hips_offset = local_joints_positions_flatten[..., self.skeleton.root_idx : self.skeleton.root_idx + 3]
        root_pos = torch.stack(
            [
                smooth_root_pos[..., 0] + hips_offset[..., 0],
                smooth_root_pos[..., 1],
                smooth_root_pos[..., 2] + hips_offset[..., 2],
            ],
            axis=-1,
        )
        return root_pos

    @ensure_batched(root_features=3, lengths=1)
    def global_root_to_local_root(
        self,
        root_features: torch.Tensor,
        normalized: bool,
        lengths: Optional[torch.Tensor],
    ):
        """Convert global root features to local-root motion features.

        Args:
            root_features: Root feature tensor containing root position and
                global heading, shaped ``[B, T, D_root]``.
            normalized: Whether ``root_features`` are normalized.
            lengths: Optional valid lengths per sequence.

        Returns:
            Tensor ``[B, T, 4]`` with local root rotational velocity, planar
            velocity, and global root height.
        """
        if normalized:
            root_features = self.global_root_stats.unnormalize(root_features)

        [root_pos, global_root_heading] = einops.unpack(root_features, self.ps[:2], "batch time *")
        cos, sin = global_root_heading.unbind(-1)
        heading_angle = torch.arctan2(sin, cos)

        local_root_rot_vel = compute_vel_angle(heading_angle, self.fps, lengths=lengths)
        local_root_vel = compute_vel_xyz(
            root_pos[..., None, :],
            self.fps,
            lengths=lengths,
        )[..., 0, [0, 2]]
        global_root_y = root_pos[..., 1]
        local_root_motion = torch.cat(
            [
                local_root_rot_vel[..., None],
                local_root_vel,
                global_root_y[..., None],
            ],
            axis=-1,
        )

        if normalized:
            local_root_motion = self.local_root_stats.normalize(local_root_motion)
        return local_root_motion

    def get_root_heading_angle(self, features: torch.Tensor) -> torch.Tensor:
        """Compute root heading angle from cosine/sine heading features."""
        global_root_heading = features[:, :, self.slice_dict["global_root_heading"]]
        cos, sin = global_root_heading.unbind(-1)
        return torch.arctan2(sin, cos)

    @ensure_batched(features=3)
    def rotate_to(
        self,
        features: torch.Tensor,
        target_angle: torch.Tensor,
        return_delta_angle=False,
    ):
        """Rotate each sequence so frame-0 heading matches ``target_angle``."""
        # rotate so that the first frame angle is the target
        # it put the motion_rep to the angle
        current_first_angle = self.get_root_heading_angle(features)[:, 0]
        delta_angle = target_angle - current_first_angle
        rotated_features = self.rotate(features, delta_angle)
        if return_delta_angle:
            return rotated_features, delta_angle
        return rotated_features

    @ensure_batched(features=3)
    def rotate_to_zero(
        self,
        features: torch.Tensor,
        return_delta_angle=False,
    ):
        """Rotate each sequence so frame-0 heading becomes zero."""
        target_angle = torch.zeros(len(features), device=features.device)
        return self.rotate_to(features, target_angle, return_delta_angle=return_delta_angle)

    @ensure_batched(features=3)
    def randomize_first_heading(
        self,
        features: torch.Tensor,
        return_delta_angle=False,
    ) -> torch.Tensor:
        """Rotate each sequence to a random frame-0 heading."""
        target_heading_angle = torch.rand(features.shape[0]) * 2 * np.pi
        return self.rotate_to(
            features,
            target_heading_angle,
            return_delta_angle=return_delta_angle,
        )

    @ensure_batched(features=3, target_2d_pos=2)
    def translate_2d_to(
        self,
        features: torch.Tensor,
        target_2d_pos: torch.Tensor,
        return_delta_pos: bool = False,
    ) -> torch.Tensor:
        """Translate each sequence so frame-0 root ``(x, z)`` matches a target."""
        root_pos = self.get_root_pos(features)
        current_first_2d_pos = root_pos[:, 0, [0, 2]].clone()
        delta_2d_pos = target_2d_pos - current_first_2d_pos
        translated_features = self.translate_2d(features, delta_2d_pos)
        if return_delta_pos:
            return translated_features, delta_2d_pos
        return translated_features

    @ensure_batched(features=3)
    def translate_2d_to_zero(
        self,
        features: torch.Tensor,
        return_delta_pos: bool = False,
    ) -> torch.Tensor:
        """Translate each sequence so frame-0 root ``(x, z)`` is at the origin."""
        target_2d_pos = torch.zeros(len(features), 2, device=features.device)
        return self.translate_2d_to(features, target_2d_pos, return_delta_pos=return_delta_pos)

    @ensure_batched(features=3)
    def canonicalize(self, features: torch.Tensor, normalized: bool = False):
        """Canonicalize heading and planar position at frame 0."""
        if normalized:
            features = self.unnormalize(features)
        rotated_features = self.rotate_to_zero(features)
        canonicalized_features = self.translate_2d_to_zero(rotated_features)
        if normalized:
            canonicalized_features = self.normalize(canonicalized_features)
        return canonicalized_features

    def normalize(self, features):
        """Normalize features."""
        return self.stats.normalize(features)

    def unnormalize(self, features):
        """Undo feature normalization."""
        return self.stats.unnormalize(features)

    def create_conditions_from_constraints(
        self,
        constraints_lst: list,
        length: int,
        to_normalize: bool,
        device: str,
    ):
        """Create a conditioning tensor and mask from constraint objects."""
        index_dict, data_dict = build_condition_dicts(constraints_lst)
        return self.create_conditions(index_dict, data_dict, length, to_normalize, device)

    def create_conditions_from_constraints_batched(
        self,
        constraints_lst: list | list[list],
        lengths: torch.Tensor,
        to_normalize: bool,
        device: str,
    ):
        """Batched version of ``create_conditions_from_constraints``.

        Supports either one shared constraint list for all batch elements, or a per-sample list of
        constraint lists.
        """
        num_samples = len(lengths)
        if not constraints_lst or not isinstance(constraints_lst[0], list):
            # If no constraints, or constraints are shared across the batch,
            # build once and repeat.
            observed_motion, motion_mask = self.create_conditions_from_constraints(
                constraints_lst, int(lengths.max()), to_normalize, device
            )
            observed_motion = repeat(observed_motion, "t d -> b t d", b=num_samples)
            motion_mask = repeat(motion_mask, "t d -> b t d", b=num_samples)
            return observed_motion, motion_mask

        length = int(lengths.max())
        observed_motion_lst = []
        motion_mask_lst = []
        for constraints_lst_el in constraints_lst:
            observed_motion, motion_mask = self.create_conditions_from_constraints(
                constraints_lst_el,
                length,
                to_normalize,
                device,
            )
            observed_motion_lst.append(observed_motion)
            motion_mask_lst.append(motion_mask)
        observed_motion = torch.stack(observed_motion_lst, axis=0)
        motion_mask = torch.stack(motion_mask_lst, axis=0)
        return observed_motion, motion_mask


================================================
FILE: kimodo/motion_rep/reps/kimodo_motionrep.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from typing import Optional

import einops
import torch
from torch import Tensor

from kimodo.tools import to_numpy

from ...geometry import cont6d_to_matrix, matrix_to_cont6d
from ...skeleton.kinematics import fk
from ...skeleton.transforms import global_rots_to_local_rots
from ...tools import ensure_batched
from ..conditioning import get_unique_index_and_data
from ..feature_utils import RotateFeatures, compute_heading_angle, compute_vel_xyz
from ..feet import foot_detect_from_pos_and_vel
from ..smooth_root import get_smooth_root_pos
from .base import MotionRepBase


class KimodoMotionRep(MotionRepBase):
    """Global root / global joints rotations representation, relative to a smooth root."""

    def __init__(
        self,
        skeleton,
        fps,
        stats_path: Optional[str] = None,
    ):
        nbjoints = skeleton.nbjoints

        self.size_dict = {
            "smooth_root_pos": torch.Size([3]),
            "global_root_heading": torch.Size([2]),
            "local_joints_positions": torch.Size([nbjoints, 3]),
            "global_rot_data": torch.Size([nbjoints, 6]),
            "velocities": torch.Size([nbjoints, 3]),
            "foot_contacts": torch.Size([4]),
        }
        self.last_root_feature = "global_root_heading"
        self.local_root_size_dict = {
            "local_root_rot_vel": torch.Size([1]),
            "local_root_vel": torch.Size([2]),
            "global_root_y": torch.Size([1]),
        }
        super().__init__(skeleton, fps, stats_path)

    @ensure_batched(local_joint_rots=5, root_positions=3, lengths=1)
    def __call__(
        self,
        local_joint_rots: torch.Tensor,
        root_positions: torch.Tensor,
        to_normalize: bool,
        to_canonicalize: bool = False,
        lengths: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """Convert local rotations and root trajectory into smooth-root features.

        Args:
            local_joint_rots: Local joint rotation matrices ``[B, T, J, 3, 3]``.
            root_positions: Root positions ``[B, T, 3]``.
            to_normalize: Whether to normalize output features.
            to_canonicalize: Whether to canonicalize output features (False by default).
            lengths: Optional valid lengths for variable-length batches.

        Returns:
            Motion features with shape ``[B, T, motion_rep_dim]``.
        """
        device = local_joint_rots.device
        if lengths is None:
            assert local_joint_rots.shape[0] == 1, "If lenghts is not provided, the input should not be batched."
            lengths = torch.tensor([local_joint_rots.shape[1]], device=device)

        (
            global_joints_rots,
            global_joints_positions,
            local_joints_positions_origin_is_pelvis,
        ) = fk(local_joint_rots, root_positions, self.skeleton)

        root_heading_angle = compute_heading_angle(global_joints_positions, self.skeleton)
        global_root_heading = torch.stack([torch.cos(root_heading_angle), torch.sin(root_heading_angle)], dim=-1)

        smooth_root_pos = get_smooth_root_pos(root_positions)
        hips_offset = root_positions - smooth_root_pos
        hips_offset[..., 1] = root_positions[..., 1]
        local_joints_positions = local_joints_positions_origin_is_pelvis + hips_offset[:, :, None]

        velocities = compute_vel_xyz(global_joints_positions, self.fps, lengths=lengths)
        foot_contacts = foot_detect_from_pos_and_vel(global_joints_positions, velocities, self.skeleton, 0.15, 0.10)
        global_rot_data = matrix_to_cont6d(global_joints_rots)

        features, _ = einops.pack(
            [
                smooth_root_pos,
                global_root_heading,
                local_joints_positions,
                global_rot_data,
                velocities,
                foot_contacts,
            ],
            "batch time *",
        )

        if to_canonicalize:
            features = self.canonicalize(features, normalized=False)

        if to_normalize:
            features = self.normalize(features)
        return features

    @ensure_batched(features=3, angle=1)
    def rotate(self, features: torch.Tensor, angle: torch.Tensor):
        """Rotate root/joint positional and rotational features by heading."""
        # assume it is not normalized
        bs = features.shape[0]
        device = features.device
        [
            smooth_root_pos,
            global_root_heading,
            local_joints_positions,
            global_rot_data,
            velocities,
            foot_contacts,
        ] = einops.unpack(features, self.ps, "batch time *")

        if not isinstance(angle, torch.Tensor):
            angle = torch.tensor(angle, device=device)
        if len(angle.shape) == 0:
            angle = angle.repeat(bs)

        RF = RotateFeatures(angle)
        new_features, _ = einops.pack(
            [
                RF.rotate_positions(smooth_root_pos),
                RF.rotate_2d_positions(global_root_heading),
                RF.rotate_positions(local_joints_positions),
                RF.rotate_6d_rotations(global_rot_data),
                RF.rotate_positions(velocities),
                foot_contacts,
            ],
            "batch time *",
        )
        return new_features

    @ensure_batched(features=3, translation_2d=2)
    def translate_2d(
        self,
        features: torch.Tensor,
        translation_2d: torch.Tensor,
    ) -> torch.Tensor:
        """Translate smooth root planar position by ``(dx, dz)``."""
        # only move on the ground
        # If we need a translate_3D function, we should not forget to move the local_joints_positions as well
        bs = features.shape[0]
        if len(translation_2d.shape) == 1:
            translation_2d = translation_2d.repeat(bs, 1)

        new_features = features.clone()
        new_smooth_root_pos = new_features[:, :, self.slice_dict["smooth_root_pos"]]
        new_smooth_root_pos[:, :, 0] += translation_2d[:, [0]]
        new_smooth_root_pos[:, :, 2] += translation_2d[:, [1]]
        return new_features

    @ensure_batched(features=3)
    def inverse(
        self,
        features: torch.Tensor,
        is_normalized: bool,
        posed_joints_from="rotations",
        return_numpy: bool = False,
    ) -> torch.Tensor:
        """Decode smooth-root features into motion tensors."""
        assert posed_joints_from in [
            "rotations",
            "positions",
        ], "posed_joints_from should 'rotations' or 'positions'"

        if is_normalized:
            features = self.unnormalize(features)

        [
            smooth_root_pos,
            global_root_heading,
            local_joints_positions,
            global_rot_data,
            velocities,
            foot_contacts,
        ] = einops.unpack(features, self.ps, "batch time *")

        global_rot_mats = cont6d_to_matrix(global_rot_data)
        local_rot_mats = global_rots_to_local_rots(global_rot_mats, self.skeleton)

        posed_joints_from_pos = local_joints_positions.clone()
        posed_joints_from_pos[..., 0] += smooth_root_pos[..., None, 0]
        posed_joints_from_pos[..., 2] += smooth_root_pos[..., None, 2]
        root_positions = posed_joints_from_pos[..., self.skeleton.root_idx, :]
        foot_contacts = foot_contacts > 0.5

        if posed_joints_from == "rotations":
            _, posed_joints, _ = self.skeleton.fk(
                local_rot_mats,
                root_positions,
            )
        else:
            posed_joints = posed_joints_from_pos

        output_tensor_dict = {
            "local_rot_mats": local_rot_mats,
            "global_rot_mats": global_rot_mats,
            "posed_joints": posed_joints,
            "root_positions": root_positions,
            "smooth_root_pos": smooth_root_pos,
            "foot_contacts": foot_contacts,
            "global_root_heading": global_root_heading,
        }
        if return_numpy:
            return to_numpy(output_tensor_dict)
        return output_tensor_dict

    def create_conditions(
        self,
        index_dict: dict[Tensor],
        data_dict: dict[Tensor],
        length: int,
        to_normalize: bool,
        device: str,
    ):
        """Build sparse conditioning tensors for smooth-root representation."""
        # create empty features and mask to be filled in
        observed_motion = torch.zeros(length, self.motion_rep_dim, device=device)
        motion_mask = torch.zeros(length, self.motion_rep_dim, dtype=bool, device=device)

        def _cat_indices(indices_list: list[Tensor]) -> Tensor:
            indices = torch.cat([torch.tensor(x) if not isinstance(x, Tensor) else x for x in indices_list])
            return indices.to(device=device, dtype=torch.long)

        def _match_obs_dtype(tensor: Tensor) -> Tensor:
            return tensor.to(device=device, dtype=observed_motion.dtype)

        if (fname := "smooth_root_2d") in index_dict and index_dict[fname]:
            indices = _cat_indices(index_dict[fname])
            indices, smooth_root_2d = get_unique_index_and_data(indices, torch.cat(data_dict[fname]))
            smooth_root_2d = _match_obs_dtype(smooth_root_2d)
            f_sliced = observed_motion[:, self.slice_dict["smooth_root_pos"]]
            f_sliced[indices, 0] = smooth_root_2d[:, 0]
            f_sliced[indices, 2] = smooth_root_2d[:, 1]
            m_sliced = motion_mask[:, self.slice_dict["smooth_root_pos"]]
            m_sliced[indices, 0] = True
            m_sliced[indices, 2] = True

        if (fname := "root_y_pos") in index_dict and index_dict[fname]:
            indices = _cat_indices(index_dict[fname])
            indices, root_pos_Y = get_unique_index_and_data(indices, torch.cat(data_dict[fname]))
            root_pos_Y = _match_obs_dtype(root_pos_Y)
            f_sliced = observed_motion[:, self.slice_dict["smooth_root_pos"]]
            f_sliced[indices, 1] = root_pos_Y
            m_sliced = motion_mask[:, self.slice_dict["smooth_root_pos"]]
            m_sliced[indices, 1] = True

        if (fname := "global_root_heading") in index_dict and index_dict[fname]:
            indices = _cat_indices(index_dict[fname])
            indices, global_root_heading = get_unique_index_and_data(indices, torch.cat(data_dict[fname]))
            global_root_heading = _match_obs_dtype(global_root_heading)
            f_sliced = observed_motion[:, self.slice_dict[fname]]
            f_sliced[indices] = global_root_heading
            m_sliced = motion_mask[:, self.slice_dict[fname]]
            m_sliced[indices] = True

        if (fname := "global_joints_rots") in index_dict and index_dict[fname]:
            indices_lst = _cat_indices(index_dict[fname])
            indices_lst, global_joints_rots = get_unique_index_and_data(indices_lst, torch.cat(data_dict[fname]))
            global_joints_rots = _match_obs_dtype(global_joints_rots)
            global_rot_data = matrix_to_cont6d(global_joints_rots)
            f_sliced = observed_motion[:, self.slice_dict["global_rot_data"]]
            masking = torch.zeros(len(f_sliced) * self.nbjoints, 6, device=device, dtype=bool)
            masking[indices_lst.T[0] * self.nbjoints + indices_lst.T[1]] = True
            masking = masking.reshape(len(f_sliced), self.nbjoints * 6)
            f_sliced[masking] = global_rot_data.flatten()
            m_sliced = motion_mask[:, self.slice_dict["global_rot_data"]]
            m_sliced[masking] = True

        if (fname := "global_joints_positions") in index_dict and index_dict[fname]:
            indices_lst = _cat_indices(index_dict[fname])
            indices_lst, global_joints_positions = get_unique_index_and_data(indices_lst, torch.cat(data_dict[fname]))
            global_joints_positions = _match_obs_dtype(global_joints_positions)
            T_indices = indices_lst[:, 0].contiguous()
            _test = motion_mask[T_indices, self.slice_dict["smooth_root_pos"]]
            if not _test[:, [0, 2]].all():
                raise ValueError("For constraining global positions, the smooth root should also be constrained.")
            smooth_root_pos = observed_motion[T_indices, self.slice_dict["smooth_root_pos"]].clone()
            local_reference = smooth_root_pos.clone()
            local_reference[..., 1] = 0.0
            local_joints_positions = global_joints_positions - local_reference
            f_sliced = observed_motion[:, self.slice_dict["local_joints_positions"]]
            masking = torch.zeros(len(f_sliced) * self.nbjoints, 3, device=device, dtype=bool)
            masking[indices_lst.T[0] * self.nbjoints + indices_lst.T[1]] = True
            masking = masking.reshape(len(f_sliced), self.nbjoints * 3)
            f_sliced[masking] = local_joints_positions.flatten()
            m_sliced = motion_mask[:, self.slice_dict["local_joints_positions"]]
            m_sliced[masking] = True

        if to_normalize:
            observed_motion = self.normalize(observed_motion)
        return observed_motion, motion_mask


================================================
FILE: kimodo/motion_rep/reps/tmr_motionrep.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""TMR motion representation: global root, global joints, velocities, and foot contacts."""

from typing import Optional

import einops
import torch

from ...skeleton.kinematics import fk
from ...tools import ensure_batched, to_numpy
from ..feature_utils import RotateFeatures, compute_heading_angle, compute_vel_xyz
from ..feet import foot_detect_from_pos_and_vel
from .base import MotionRepBase


class TMRMotionRep(MotionRepBase):
    """Motion representation with global root and local joint positions.
    The local joint positions are rotation invariant (they all face z+)

    Feature layout:
    - root position ``(x, y, z)``
    - root heading as ``(cos(theta), sin(theta))``
    - local joint positions (root and rotation removed)
    - local joint velocities (rotation removed)
    - binary foot contacts
    """

    def __init__(
        self,
        skeleton,
        fps,
        stats_path: Optional[str] = None,
    ):
        nbjoints = skeleton.nbjoints

        self.size_dict = {
            "root_pos": torch.Size([3]),
            "global_root_heading": torch.Size([2]),
            "local_joints_positions": torch.Size([nbjoints - 1, 3]),
            "velocities": torch.Size([nbjoints, 3]),
            "foot_contacts": torch.Size([4]),
        }
        self.last_root_feature = "global_root_heading"
        self.local_root_size_dict = {
            "local_root_rot_vel": torch.Size([1]),
            "local_root_vel": torch.Size([2]),
            "global_root_y": torch.Size([1]),
        }
        super().__init__(skeleton, fps, stats_path)

    @ensure_batched(local_joint_rots=5, root_positions=3, posed_joints=4, lengths=1)
    def __call__(
        self,
        local_joint_rots: Optional[torch.Tensor] = None,
        root_positions: Optional[torch.Tensor] = None,
        posed_joints: Optional[torch.Tensor] = None,
        *,
        to_normalize: bool,
        to_canonicalize: bool = False,
        lengths: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """Convert motion inputs to this feature representation.

        Args:
            local_joint_rots: Local joint rotation matrices ``[B, T, J, 3, 3]``.
                Required when ``posed_joints`` is not provided.
            root_positions: Root translations ``[B, T, 3]``. Required when
                ``posed_joints`` is not provided.
            posed_joints: Optional precomputed global joint positions
                ``[B, T, J, 3]``. If passed, FK is skipped.
            to_normalize: Whether to normalize output features.
            to_canonicalize: Whether to canonicalize output features (False by default).
            lengths: Optional valid lengths for variable-length batches.

        Returns:
            Motion features with shape ``[B, T, motion_rep_dim]``.
        """
        if posed_joints is not None:
            device = posed_joints.device
            nbatch, nbframes, nbjoints = posed_joints.shape[:3]
        else:
            device = local_joint_rots.device
            nbatch, nbframes, nbjoints = local_joint_rots.shape[:3]

        if lengths is None:
            assert nbatch == 1, "If lenghts is not provided, the input should not be batched."
            lengths = torch.tensor([nbframes], device=device)

        if posed_joints is None:
            _, global_positions, local_joints_positions_origin_is_pelvis = fk(
                local_joint_rots, root_positions, self.skeleton
            )
        else:
            global_positions = posed_joints
            root_positions = posed_joints[:, :, 0]
            local_joints_positions_origin_is_pelvis = posed_joints - root_positions[:, :, None]

        root_heading_angle = compute_heading_angle(global_positions, self.skeleton)
        global_root_heading = torch.stack([torch.cos(root_heading_angle), torch.sin(root_heading_angle)], dim=-1)

        ground_offset = 0 * root_positions
        ground_offset[..., 1] = root_positions[..., 1]

        local_joints_positions = local_joints_positions_origin_is_pelvis[:, :, 1:] + ground_offset[:, :, None]
        velocities = compute_vel_xyz(global_positions, self.fps, lengths=lengths)

        # Remove the heading angle for each frame
        RF = RotateFeatures(-root_heading_angle)
        local_joints_positions = RF.rotate_positions(local_joints_positions)
        velocities = RF.rotate_positions(velocities)

        foot_contacts = foot_detect_from_pos_and_vel(global_positions, velocities, self.skeleton, 0.15, 0.10)
        features, _ = einops.pack(
            [
                root_positions,
                global_root_heading,
                local_joints_positions,
                velocities,
                foot_contacts,
            ],
            "batch time *",
        )

        if to_canonicalize:
            features = self.canonicalize(features, normalized=False)

        if to_normalize:
            features = self.normalize(features)
        return features

    @ensure_batched(features=3, angle=1)
    def rotate(self, features: torch.Tensor, angle: torch.Tensor):
        """Rotate all spatial features by a heading delta (radians)."""
        # rotate by the angle
        # it add the angle to the current features
        # assume it is not normalized
        bs = features.shape[0]
        device = features.device
        [
            root_pos,
            global_root_heading,
            local_joints_positions,
            velocities,
            foot_contacts,
        ] = einops.unpack(features, self.ps, "batch time *")

        if not isinstance(angle, torch.Tensor):
            angle = torch.tensor(angle, device=device)
        if len(angle.shape) == 0:
            angle = angle.repeat(bs)

        RF = RotateFeatures(angle)
        new_features, _ = einops.pack(
            [
                RF.rotate_positions(root_pos),
                RF.rotate_2d_positions(global_root_heading),
                local_joints_positions,  # already rotation invariant
                velocities,  # already rotation invariant
                foot_contacts,
            ],
            "batch time *",
        )
        return new_features

    @ensure_batched(features=3, translation_2d=2)
    def translate_2d(
        self,
        features: torch.Tensor,
        translation_2d: torch.Tensor,
    ) -> torch.Tensor:
        """Translate root planar position by ``(dx, dz)``."""
        # only move on the ground
        # For 3D, we should not forget to move the local_joints_positions as well
        bs = features.shape[0]
        if len(translation_2d.shape) == 1:
            translation_2d = translation_2d.repeat(bs, 1)

        new_features = features.clone()
        new_root_pos = new_features[:, :, self.slice_dict["root_pos"]]
        new_root_pos[:, :, 0] += translation_2d[:, 0]
        new_root_pos[:, :, 2] += translation_2d[:, 1]
        return new_features

    @ensure_batched(features=3)
    def inverse(
        self,
        features: torch.Tensor,
        is_normalized: bool,
        posed_joints_from="positions",
        return_numpy: bool = False,
    ) -> torch.Tensor:
        """Decode features back to a motion dictionary.

        Args:
            features: Feature tensor ``[B, T, D]``.
            is_normalized: Whether input features are normalized.
            posed_joints_from: Must be ``"positions"`` for this representation.
            return_numpy: Whether to convert tensors to numpy arrays.

        Returns:
            Dictionary containing reconstructed positions and auxiliary data.
        """
        assert posed_joints_from == "positions"
        if is_normalized:
            features = self.unnormalize(features)

        [
            root_positions,
            global_root_heading,
            local_joints_positions,
            velocities,
            foot_contacts,
        ] = einops.unpack(features, self.ps, "batch time *")

        dummy_root = 0 * local_joints_positions[:, :, [0]]
        posed_joints_from_pos = torch.stack([dummy_root, local_joints_positions], axis=2)
        posed_joints_from_pos[..., 0] += root_positions[..., None, 0]
        posed_joints_from_pos[..., 2] += root_positions[..., None, 2]
        root_positions = posed_joints_from_pos[..., self.skeleton.root_idx, :]
        foot_contacts = foot_contacts > 0.5
        posed_joints = posed_joints_from_pos

        output_tensor_dict = {
            "local_rot_mats": None,
            "global_rot_mats": None,
            "posed_joints": posed_joints,
            "root_positions": root_positions,
            "foot_contacts": foot_contacts,
            "global_root_heading": global_root_heading,
        }
        if return_numpy:
            return to_numpy(output_tensor_dict)
        return output_tensor_dict


================================================
FILE: kimodo/motion_rep/smooth_root.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Smooth root trajectory: ADMM-based smoother with margin constraints and get_smooth_root_pos helper."""

import math

import numpy as np
import torch
from scipy import sparse
from scipy.sparse.linalg import splu

from kimodo.tools import ensure_batched


class TrajectorySmoother:
    """Modify trajectories to hit target values while respecting soft constraints.

    This smoother keeps the trajectory close to the original positions while minimizing
    accelerations. Targets are enforced at specified frames via soft constraints.
    """

    def __init__(
        self,
        margins,
        pos_weight=0.0,
        loop=False,
        admm_iters=100,
        alpha_overrelax=1.0,
        circle_project=False,
    ):
        """Initialize the TrajectorySmoother.

        Args:
            margins: Array of margin values for each frame.
                    margins[i] < 0: unconstrained
                    margins[i] == 0: pinned on this frame
                    margins[i] > 0: can deviate within the margin
            pos_weight: Weight for position preservation
            loop: Whether the trajectory should loop
            admm_iters: Number of ADMM iterations
        """
        self.pos_weight = pos_weight
        self.admm_iters = admm_iters
        self.alpha_overrelax = alpha_overrelax
        self.circle_project = circle_project
        N = len(margins)

        # Store margin information as numpy arrays
        self.margin_vals = margins

        # Build acceleration matrix A
        a_data = []
        a_rows = []
        a_cols = []

        for i in range(1, N - 1):
            scale = 1.0
            a_data.extend([-scale, 2.0 * scale, -scale])
            a_rows.extend([i, i, i])
            a_cols.extend([i - 1, i, i + 1])

        if loop:
            # Add periodic accelerations
            scale = 1.0
            a_data.extend([-scale, 2.0 * scale, -scale])
            a_rows.extend([0, 0, 0])
            a_cols.extend([N - 1, 0, 1])

            scale = 1.0
            a_data.extend([-scale, 2.0 * scale, -scale])
            a_rows.extend([N - 1, N - 1, N - 1])
            a_cols.extend([N - 2, N - 1, 0])

        A = sparse.csr_matrix((a_data, (a_rows, a_cols)), shape=(N, N))

        # Build identity matrix
        identity_matrix = sparse.eye(N)

        # Build system matrix M
        M = pos_weight * identity_matrix + A.T @ A

        # Calculate ADMM step size
        diag_max = max(abs(M.diagonal()))
        self.admm_stepsize = 0.25 * np.sqrt(diag_max)

        M = M + self.admm_stepsize * identity_matrix
        self.system_lu = splu(M.tocsc())

    def smooth(self, targets, x0):
        """Interpolate between reference positions while satisfying constraints.

        Args:
            observations: Target positions for constrained frames (numpy array)
            ref_positions: Reference positions defining original shape
                         (numpy array)

        Returns:
            Interpolated positions (numpy array)
        """
        x_target = targets.copy()
        x = x0.copy()
        z = np.zeros_like(x)
        u = np.zeros_like(x)

        for _ in range(self.admm_iters):
            self.z_update(z, x, x_target, u)
            self.u_update(u, x, z)
            self.x_update(x, z, u, x_target)

        return x

    def x_update(self, x, z, u, x_t):
        """Update x in the ADMM iteration."""

        # x = (wp * I + A^T A + p I)^-1 (wp * x_orig + p (z - u))
        r = self.pos_weight * x_t + self.admm_stepsize * (z - u)
        x[:] = self.system_lu.solve(r)

    def z_update(self, z, x, z_t, u):
        """Update z in the ADMM iteration using vectorized operations."""
        # Compute the difference from target for all margin locations at once
        z[:] = x + u - z_t

        # Check if we need to project back to margin
        z_diff_norms = np.linalg.norm(z, axis=1)
        mask = z_diff_norms > self.margin_vals
        if np.any(mask):
            scale_factors = self.margin_vals[mask] / z_diff_norms[mask]
            z[mask] *= scale_factors[:, np.newaxis]

        # Add back the target
        z[:] += z_t

        if self.circle_project:
            z[:] = z / (np.linalg.norm(z, axis=1, keepdims=True) + 1.0e-6)

    def u_update(self, u, x, z):
        """Update u in the ADMM iteration using vectorized operations."""
        u[:] += self.alpha_overrelax * (x - z)


def smooth_signal(x, margins, pos_weight=0, alpha_overrelax=1.8, admm_iters=500, circle_project=False):
    """Multigrid trajectory smoothing with margin constraints.

    Args:
        x: Input trajectory ``[T, D]`` as a NumPy array.
        margins: Allowed radius around each target frame ``[T]``.
        pos_weight: Weight for staying close to the original signal.
        alpha_overrelax: ADMM over-relaxation coefficient.
        admm_iters: ADMM iterations per multigrid level.
        circle_project: If ``True``, project each vector to the unit sphere.

    Returns:
        Smoothed trajectory of shape ``[T, D]``.
    """
    x_smoothed = x.copy()
    x_smoothed[:] = x.mean(axis=0, keepdims=True)

    # smooth the signal, multigrid style by starting out coarse,
    # doubling the resolution and repeating until we're at the full
    # resolution, using the previous result as the initial guess.
    levels = int(math.floor(math.log2(len(x))))
    levels = max(levels - 4, 1)

    stepsize = 2**levels
    while True:
        # smooth signals at this level:
        num_steps = len(x_smoothed[::stepsize])
        smoother = TrajectorySmoother(
            margins=margins[::stepsize],
            pos_weight=pos_weight,
            alpha_overrelax=alpha_overrelax,
            admm_iters=admm_iters,
            circle_project=circle_project,
        )
        x_smoothed[::stepsize] = smoother.smooth(x[::stepsize], x_smoothed[::stepsize])

        # interpolate to next level:
        next_stepsize = stepsize // 2
        num_interleaved = len(x_smoothed[next_stepsize::stepsize])
        if num_interleaved == num_steps:
            # linearly extrapolate the last value if we have to:
            x_smoothed[next_stepsize::stepsize][-1] = (
                x_smoothed[::stepsize][-1] + (x_smoothed[::stepsize][-1] - x_smoothed[::stepsize][-2]) / 2
            )
            num_interleaved = num_interleaved - 1

        # linearly interpolate the remaining values:
        x_smoothed[next_stepsize::stepsize][:num_interleaved] = (
            x_smoothed[::stepsize][:-1] + x_smoothed[::stepsize][1:]
        ) / 2

        if stepsize == 1:
            break

        stepsize //= 2

    return x_smoothed


@ensure_batched(hip_translations=3)
def get_smooth_root_pos(hip_translations):
    """Smooth root trajectory in the ground plane while preserving height.

    Args:
        hip_translations: Root translations ``[B, T, 3]``.

    Returns:
        Smoothed root translations ``[B, T, 3]`` where ``x/z`` are smoothed and
        ``y`` remains unchanged.
    """
    root_translations_xz = hip_translations[..., [0, 2]]
    root_translations_y = hip_translations[..., [1]]

    batch_size, nframes = root_translations_xz.shape[:2]
    margins = np.full(root_translations_xz.shape[1], 0.06)

    root_translations_smoothed_xz = []
    for batch in range(batch_size):
        root_translations_smoothed_xz.append(
            smooth_signal(root_translations_xz[batch].detach().cpu().numpy(), margins)[None]
        )

    root_translations_smoothed_xz = torch.tensor(np.concatenate(root_translations_smoothed_xz))

    root_translations = torch.cat(
        [
            root_translations_smoothed_xz.to(root_translations_y.device),
            root_translations_y,
        ],
        dim=-1,
    )[..., [0, 2, 1]]

    return root_translations


================================================
FILE: kimodo/motion_rep/stats.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Feature normalization statistics (mean/std) for motion representations."""

import logging
import os
from typing import Optional

import numpy as np
import torch

log = logging.getLogger(__name__)


class Stats(torch.nn.Module):
    """Utility module for feature normalization statistics.

    Normalization follows:
    ``(data - mean) / sqrt(std**2 + eps)``
    """

    def __init__(
        self,
        folder: Optional[str] = None,
        load: bool = True,
        eps=1e-05,
    ):
        super().__init__()
        self.folder = folder
        self.eps = eps
        if folder is not None and load:
            self.load()

    def sliced(self, indices):
        """Return a new ``Stats`` object containing selected feature indices."""
        new_stats = Stats(folder=self.folder, load=False, eps=self.eps)
        new_stats.register_from_tensors(
            self.mean[..., indices].clone(),
            self.std[..., indices].clone(),
        )
        return new_stats

    def load(self):
        """Load ``mean.npy`` and ``std.npy`` from ``self.folder``."""
        mean_path = os.path.join(self.folder, "mean.npy")
        std_path = os.path.join(self.folder, "std.npy")
        if not os.path.exists(mean_path) or not os.path.exists(std_path):
            raise FileNotFoundError(
                f"Missing stats files in '{self.folder}'. Expected:\n"
                f"  - {mean_path}\n"
                f"  - {std_path}\n\n"
                "Make sure the checkpoint/stats have been downloaded and are mounted into the container.\n"
                "If you're using Docker Compose, run it from the repo root so `./:/workspace` mounts the correct directory."
            )

        mean = torch.from_numpy(np.load(mean_path))
        std = torch.from_numpy(np.load(std_path))
        self.register_from_tensors(mean, std)

    def register_from_tensors(self, mean: torch.Tensor, std: torch.Tensor):
        """Register mean/std tensors as non-persistent buffers."""
        self.register_buffer("mean", mean, persistent=False)
        self.register_buffer("std", std, persistent=False)

    def normalize(self, data: torch.Tensor) -> torch.Tensor:
        """Normalize data using the stored statistics."""
        mean = self.mean.to(device=data.device, dtype=data.dtype)
        std = self.std.to(device=data.device, dtype=data.dtype)
        # adjust std with eps
        return (data - mean) / torch.sqrt(std**2 + self.eps)

    def unnormalize(self, data: torch.Tensor) -> torch.Tensor:
        """Undo normalization using the stored statistics."""
        mean = self.mean.to(device=data.device, dtype=data.dtype)
        std = self.std.to(device=data.device, dtype=data.dtype)
        # adjust std with eps
        return data * torch.sqrt(std**2 + self.eps) + mean

    def is_loaded(self):
        """Return whether statistics are currently available."""
        return hasattr(self, "mean")

    def get_dim(self):
        """Return feature dimensionality."""
        return self.mean.shape[0]

    def save(
        self,
        folder: Optional[str] = None,
        mean: Optional[torch.Tensor] = None,
        std: Optional[torch.Tensor] = None,
    ):
        """Save statistics to ``folder`` as ``mean.npy`` and ``std.npy``."""
        if folder is None:
            folder = self.folder
            if folder is None:
                raise ValueError("No folder to save stats")

        if mean is None and std is None:
            try:
                mean = self.mean.cpu().numpy()
                std = self.std.cpu().numpy()
            except AttributeError:
                raise ValueError("Stats were not loaded")

        # don't override stats folder
        os.makedirs(folder, exist_ok=False)

        np.save(os.path.join(folder, "mean.npy"), mean)
        np.save(os.path.join(folder, "std.npy"), std)

    def __eq__(self, other):
        return (self.mean.cpu() == other.mean.cpu()).all() and (self.std.cpu() == other.std.cpu()).all()

    # should define a hash value for pytorch, as we defined __eq__
    def __hash__(self):
        # Convert mean and std to bytes for a consistent hash value
        mean_hash = hash(self.mean.detach().cpu().numpy().tobytes())
        std_hash = hash(self.std.detach().cpu().numpy().tobytes())
        return hash((mean_hash, std_hash))

    def __repr__(self):
        return f'Stats(folder="{self.folder}")'


================================================
FILE: kimodo/postprocess.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Post-processing utilities for motion generation output."""

from types import SimpleNamespace
from typing import Dict, List, Optional, Tuple

import numpy as np
import torch

from .constraints import (
    EndEffectorConstraintSet,
    FullBodyConstraintSet,
    Root2DConstraintSet,
)
from .geometry import matrix_to_quaternion, quaternion_to_matrix
from .skeleton import (
    G1Skeleton34,
    SkeletonBase,
    SMPLXSkeleton22,
    SOMASkeleton30,
    SOMASkeleton77,
    fk,
)


def extract_input_motion_from_constraints(
    constraint_lst: List,
    skeleton: SkeletonBase,
    num_frames: int,
    num_joints: int,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Extract hip translations and local rotations from constraints for postprocessing.

    Args:
        constraint_lst: List of constraints (FullBodyConstraintSet, EndEffectorConstraintSet, etc.)
        skeleton: Skeleton instance
        num_frames: Total number of frames in the motion
        num_joints: Number of joints

    Returns:
        Tuple of (hip_translations_input, rotations_input):
            - hip_translations_input: Hip translations, shape (T, 3)
            - rotations_input: Local joint rotations as quaternions, shape (T, J, 4)
    """
    # Initialize with zeros for all frames
    hip_translations_input = torch.zeros(num_frames, 3)
    rotations_input = torch.zeros(num_frames, num_joints, 4)
    rotations_input[..., 0] = 1.0  # Initialize as identity quaternions (w=1, x=y=z=0)

    def _match_hip_dtype(tensor: torch.Tensor) -> torch.Tensor:
        return tensor.to(device=hip_translations_input.device, dtype=hip_translations_input.dtype)

    def _match_rot_dtype(tensor: torch.Tensor) -> torch.Tensor:
        return tensor.to(device=rotations_input.device, dtype=rotations_input.dtype)

    if not constraint_lst:
        return hip_translations_input, rotations_input

    # Sort constraints to ensure FullBodyConstraintSet is processed last
    #   This ensures it will get the last say on whether hip translations need to be exact root or smoothed root
    sorted_constraints = sorted(constraint_lst, key=lambda c: isinstance(c, FullBodyConstraintSet))
    for constraint in sorted_constraints:
        frame_indices = constraint.frame_indices
        if isinstance(frame_indices, torch.Tensor):
            valid_mask = frame_indices < num_frames
            if valid_mask.sum() == 0:
                continue
            frame_indices = frame_indices[valid_mask]
        else:
            valid_positions = [i for i, idx in enumerate(frame_indices) if idx < num_frames]
            if not valid_positions:
                continue
            frame_indices = [frame_indices[i] for i in valid_positions]

        # Handle Root2DConstraintSet separately - only assign smooth_root_2d at xz dimensions
        if isinstance(constraint, Root2DConstraintSet):
            smooth_root_2d = constraint.smooth_root_2d  # (K, 2) where K = len(frame_indices)
            if isinstance(frame_indices, torch.Tensor):
                smooth_root_2d = smooth_root_2d[valid_mask]
            else:
                smooth_root_2d = smooth_root_2d[valid_positions]
            smooth_root_2d = _match_hip_dtype(smooth_root_2d)
            hip_translations_input[frame_indices, 0] = smooth_root_2d[:, 0]  # x
            hip_translations_input[frame_indices, 2] = smooth_root_2d[:, 1]  # z
            continue
        elif isinstance(constraint, FullBodyConstraintSet) or isinstance(constraint, EndEffectorConstraintSet):
            global_rots = constraint.global_joints_rots  # (K, J, 3, 3) where K = len(frame_indices)
            global_positions = constraint.global_joints_positions  # (K, J, 3)
            if isinstance(frame_indices, torch.Tensor):
                global_rots = global_rots[valid_mask]
                global_positions = global_positions[valid_mask]
                smooth_root_2d = constraint.smooth_root_2d[valid_mask]
            else:
                global_rots = global_rots[valid_positions]
                global_positions = global_positions[valid_positions]
                smooth_root_2d = constraint.smooth_root_2d[valid_positions]

            root_positions = global_positions[:, skeleton.root_idx]  # (K, 3)
            # replace xz with smooth_root_2d values for EE constraints that do not include Hips
            #    since the hips themselves are not actually constrained in the model conditioning
            if isinstance(constraint, EndEffectorConstraintSet) and "Hips" not in constraint.joint_names:
                root_positions[:, 0] = smooth_root_2d[:, 0]  # x
                root_positions[:, 2] = smooth_root_2d[:, 1]  # z

            local_rot_mats = skeleton.global_rots_to_local_rots(global_rots)  # (K, J, 3, 3)
            local_rot_quats = matrix_to_quaternion(local_rot_mats)  # (K, J, 4)

            hip_translations_input[frame_indices] = _match_hip_dtype(root_positions)
            rotations_input[frame_indices] = _match_rot_dtype(local_rot_quats)
        else:
            NotImplementedError(f"Constraint {constraint.name} is not supported")

    return hip_translations_input, rotations_input


def create_working_rig_from_skeleton(
    skeleton: SkeletonBase, above_ground_offset: float = 0.007
) -> List[SimpleNamespace]:
    """Create the working rig as a list of SimpleNamespace objects from skeleton.

    Args:
        skeleton: SkeletonBase instance with bone_order_names, neutral_joints, joint_parents
        above_ground_offset: Additional offset to position the rig slightly above ground
    Returns:
        List of SimpleNamespace objects representing the working rig
    """
    working_rig_joints = []

    joint_names = skeleton.bone_order_names
    neutral_positions = skeleton.neutral_joints.cpu().numpy()
    parent_indices = skeleton.joint_parents.cpu().numpy()

    if isinstance(skeleton, (G1Skeleton34, SMPLXSkeleton22)):
        retarget_map = {
            skeleton.bone_order_names[skeleton.root_idx]: "Hips",
            skeleton.left_hand_joint_names[0]: "LeftHand",
            skeleton.right_hand_joint_names[0]: "RightHand",
            skeleton.left_foot_joint_names[0]: "LeftFoot",
            skeleton.right_foot_joint_names[0]: "RightFoot",
        }
    else:
        # works for SOMA
        retarget_map = {
            "Hips": "Hips",
            "Head": "Head",
            "LeftHand": "LeftHand",
            "RightHand": "RightHand",
            "LeftFoot": "LeftFoot",
            "RightFoot": "RightFoot",
        }

    for i, joint_name in enumerate(joint_names):
        parent_name = None if parent_indices[i] == -1 else joint_names[parent_indices[i]]

        # Calculate local translation relative to parent
        if parent_indices[i] == -1:
            # Move the rig so that the lowest point (toe) is at ground level (y=0),
            # plus a small offset to position the rig slightly above ground
            toe_height = neutral_positions[:, 1].min()  # lowest y-coordinate (toe)
            local_translation = (
                neutral_positions[i] + np.array([0.0, -toe_height + above_ground_offset, 0.0])
            ).tolist()
        else:
            parent_idx = parent_indices[i]
            parent_position = neutral_positions[parent_idx]
            joint_position = neutral_positions[i]
            local_translation = (joint_position - parent_position).tolist()

        # Default rotation (identity quaternion: x=0, y=0, z=0, w=1)
        default_rotation = [0.0, 0.0, 0.0, 1.0]

        joint_info = SimpleNamespace(
            name=joint_name,
            parent=parent_name,
            t_pose_rotation=default_rotation,
            t_pose_translation=local_translation,
            retarget_tag=retarget_map.get(joint_name),
        )

        working_rig_joints.append(joint_info)

    return working_rig_joints


def post_process_motion(
    local_rot_mats: torch.Tensor,
    root_positions: torch.Tensor,
    contacts: torch.Tensor,
    skeleton: SkeletonBase,
    constraint_lst: Optional[List] = None,
    contact_threshold: float = 0.5,
    root_margin: float = 0.04,
) -> Dict[str, torch.Tensor]:
    """Post-process generated motion to reduce foot skating and improve quality.

    Args:
        local_rot_mats: Local joint rotation matrices, shape (B, T, J, 3, 3)
        root_positions: Root joint positions, shape (B, T, 3)
        contacts: Foot contact labels, shape (B, T, num_contacts)
        skeleton: Skeleton instance
        constraint_lst: Optional list of constraints (or list of lists of constraints for batched inference)(FullBodyConstraintSet, etc.)
        contact_threshold: Threshold for foot contact detection
        root_margin: Margin for root position correction

    Returns:
        Dictionary with corrected motion data:
            - local_rot_mats: Corrected local rotation matrices (B, T, J, 3, 3)
            - root_positions: Corrected root positions (B, T, 3)
            - posed_joints: Corrected global joint positions (B, T, J, 3)
            - global_rot_mats: Corrected global rotation matrices (B, T, J, 3, 3)
    """
    # Ensure batch dimension
    assert local_rot_mats.dim() == 5, "local_rot_mats should be 5D, make sure to include the batch dimension"

    batch_size, num_frames, num_joints = local_rot_mats.shape[:3]

    def _build_constraint_masks_dict(constraints: List) -> Dict[str, torch.Tensor]:
        out = {
            key: torch.zeros(num_frames, dtype=torch.float32)
            for key in [
                "FullBody",
                "LeftFoot",
                "RightFoot",
                "LeftHand",
                "RightHand",
                "Root",
            ]
        }
        for constraint in constraints:
            frame_indices = constraint.frame_indices
            if isinstance(frame_indices, torch.Tensor):
                frame_indices = frame_indices[frame_indices < num_frames]
                if frame_indices.numel() == 0:
                    continue
            else:
                frame_indices = [idx for idx in frame_indices if idx < num_frames]
                if not frame_indices:
                    continue
            if constraint.name == "fullbody":
                out["FullBody"][frame_indices] = 1.0
            elif constraint.name == "left-foot":
                out["LeftFoot"][frame_indices] = 1.0
            elif constraint.name == "right-foot":
                out["RightFoot"][frame_indices] = 1.0
            elif constraint.name == "left-hand":
                out["LeftHand"][frame_indices] = 1.0
            elif constraint.name == "right-hand":
                out["RightHand"][frame_indices] = 1.0
            elif constraint.name == "root2d":
                out["Root"][frame_indices] = 1.0
        return out

    # Create constraint masks from constraint_lst (one dict per batch item when batched)
    batched_constraints = bool(constraint_lst) and isinstance(constraint_lst[0], list)
    if batched_constraints:
        constraint_masks_dict_lst = [_build_constraint_masks_dict(constraint_lst[b]) for b in range(batch_size)]
    else:
        constraint_masks_dict = (
            _build_constraint_masks_dict(constraint_lst)
            if constraint_lst
            else {
                key: torch.zeros(num_frames, dtype=torch.float32)
                for key in [
                    "FullBody",
                    "LeftFoot",
                    "RightFoot",
                    "LeftHand",
                    "RightHand",
                    "Root",
                ]
            }
        )

    # Create working rig
    above_ground_offset = 0.02 if isinstance(skeleton, (SOMASkeleton30, SOMASkeleton77)) else 0.007
    # larger offset for SOMA since model tends to generate lower to the ground
    working_rig = create_working_rig_from_skeleton(skeleton, above_ground_offset=above_ground_offset)
    has_double_ankle_joints = isinstance(skeleton, G1Skeleton34)

    # Prepare input tensors. The generated motion will be modified in place. Clone first.
    neutral_joints_pelvis_offset = skeleton.neutral_joints[0].cpu().clone()
    hip_translations_corrected = root_positions.cpu().clone()
    rotations_corrected = matrix_to_quaternion(local_rot_mats).cpu().clone()  # (B, T, J, 4)
    contacts = contacts.cpu()

    # Extract input motion (target keyframes) from constraints for each batch
    # For constrained keyframes, use the original motion from constraints
    # For non-constrained frames, zeros are used
    hip_translations_input = torch.zeros(batch_size, num_frames, 3)
    rotations_input = torch.zeros(batch_size, num_frames, num_joints, 4)
    rotations_input[..., 0] = 1.0  # Initialize as identity quaternions (w=1, x=y=z=0)

    if constraint_lst:
        for b in range(batch_size):
            # Get constraints for this batch item (if batched) or use the same list
            constraints_lst_el = (
                constraint_lst[b]
                if isinstance(
                    constraint_lst[0], list
                )  # when the constraint_list is in batch format, each item in a list is a constraintlist for one sample
                else constraint_lst  # single constraint list shared for all samples in the batch
            )
            hip_translations_input[b], rotations_input[b] = extract_input_motion_from_constraints(
                constraints_lst_el,
                skeleton,
                num_frames,
                num_joints,
            )

    # Call the motion correction for each batch (optional package)
    try:
        from motion_correction import motion_postprocess
    except ImportError as e:
        raise RuntimeError(
            "Motion correction is required for this postprocessing path but the "
            "motion_correction package is not installed. Install with: pip install -e ."
        ) from e
    for b in range(batch_size):
        masks_b = constraint_masks_dict_lst[b] if batched_constraints else constraint_masks_dict
        motion_postprocess.correct_motion(
            hip_translations_corrected[b : b + 1],
            rotations_corrected[b : b + 1],
            contacts[b : b + 1],
            hip_translations_input[b : b + 1],
            rotations_input[b : b + 1],
            masks_b,
            contact_threshold,
            root_margin,
            working_rig,
            has_double_ankle_joints,
        )

    local_rot_mats_corrected = quaternion_to_matrix(rotations_corrected)

    # Compute posed joints using FK
    device = local_rot_mats.device
    global_rot_mats, posed_joints, _ = fk(
        local_rot_mats_corrected.to(device),
        hip_translations_corrected.to(device),
        skeleton,
    )

    result = {
        "local_rot_mats": local_rot_mats_corrected.to(device),
        "root_positions": hip_translations_corrected.to(device),
        "posed_joints": posed_joints,
        "global_rot_mats": global_rot_mats,
    }

    return result


================================================
FILE: kimodo/sanitize.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Text prompt sanitization for motion generation (whitespace, punctuation, capitalization)."""


def sanitize_text(text: str, paragraph: bool = True) -> str:
    """Sanitize a text prompt: strip, collapse spaces, capitalize, trim non-alphanumeric, add/fix final punctuation.

    Args:
        text: Input text prompt.
        paragraph: If True, capitalize after each sentence break and normalize spacing between sentences.

    Returns:
        Sanitized text.
    """
    # remove any trailing or leading whitespace
    text = text.strip()

    # https://stackoverflow.com/a/1546251
    # replace duplicate spaces by one space
    text = " ".join(text.split())

    if text == "":
        return text

    # removing leading non alpha numeric characters
    for i, c in enumerate(text):
        if not str.isalnum(c):
            continue
        break
    text = text[i:]

    # Capitalize
    text = text.capitalize()

    final_punctuations = ".!?\"])'"
    # removing trailing non alpha numeric characters
    # expect final punctuations
    for i, c in reversed(list(enumerate(text))):
        if not str.isalnum(c) and c not in final_punctuations:
            continue
        break
    text = text[: i + 1]

    # Adding period at the end if needed
    if text[-1] not in ".!?":
        text = text + "."

    if paragraph:
        # fix end of sentences if several sentences
        for sentence_break in ".!?":
            subtexts = text.split(sentence_break)
            text = f"{sentence_break} ".join(  # put back a space after the break
                [
                    y[0].capitalize() + y[1:]  # only capitalize the first character
                    if y
                    else y  # y is empty at the end
                    for x in subtexts
                    for y in [x.strip()]  # remove extra spaces
                ]
            ).strip()  # remove extra space at the end
    return text


def sanitize_texts(texts: list[str]) -> list[str]:
    """Sanitize each text prompt in the list (see sanitize_text).

    Args:
        texts: List of input text prompts.

    Returns:
        List of sanitized texts.
    """
    return [sanitize_text(text) for text in texts]


if __name__ == "__main__":
    texts = [
        " A person is    walking.",
        "someone go forward",
        "jump",
        "jumping!",
        "jumping)",
        "-go",
        "blocasdji  -----",
        "",
    ]

    print("Old texts")
    print("\n".join(texts))
    print()

    new_texts = sanitize_texts(texts)
    print("Sanitized texts")
    print("\n".join(new_texts))


================================================
FILE: kimodo/scripts/__init__.py
================================================


================================================
FILE: kimodo/scripts/docker-entrypoint.sh
================================================
#!/usr/bin/env bash
set -euo pipefail

HOST_UID="${HOST_UID:-}"
HOST_GID="${HOST_GID:-}"
HOST_USER="${HOST_USER:-user}"

if [[ -z "${HOST_UID}" || -z "${HOST_GID}" ]]; then
  if [[ -d /workspace ]]; then
    HOST_UID="$(stat -c %u /workspace)"
    HOST_GID="$(stat -c %g /workspace)"
  else
    HOST_UID="${HOST_UID:-1000}"
    HOST_GID="${HOST_GID:-1000}"
  fi
fi

if ! getent group "${HOST_GID}" >/dev/null 2>&1; then
  groupadd -g "${HOST_GID}" "${HOST_USER}"
fi

if ! getent passwd "${HOST_UID}" >/dev/null 2>&1; then
  useradd -m -u "${HOST_UID}" -g "${HOST_GID}" -s /bin/bash "${HOST_USER}"
fi

exec gosu "${HOST_UID}:${HOST_GID}" "$@"


================================================
FILE: kimodo/scripts/generate.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import argparse
import os
import shutil
from typing import Any, Dict, Optional

import torch

from kimodo import DEFAULT_MODEL, load_model
from kimodo.constraints import load_constraints_lst
from kimodo.exports.motion_io import save_kimodo_npz
from kimodo.meta import load_prompts_from_meta
from kimodo.model.cfg import CFG_TYPES
from kimodo.model.registry import get_model_info
from kimodo.tools import load_json, save_json, seed_everything


def parse_args():
    parser = argparse.ArgumentParser(description="Cmd line API for generation motions with kimodo")
    parser.add_argument(
        "prompt",
        nargs="?",
        type=str,
        default=None,
        help="Text prompt describing the motion to generate, or several prompts separated by periods.",
    )
    parser.add_argument(
        "--model",
        type=str,
        default=DEFAULT_MODEL,
        help="Name of the model (e.g. Kimodo-SOMA-RP-v1, etc).",
    )
    parser.add_argument(
        "--duration",
        type=str,
        default="5.0",
        help="Duration in seconds (default: 5.0). Separate by spaces in a string for different durations per prompts",
    )
    parser.add_argument(
        "--num_samples",
        type=int,
        default=1,
        help="Number of samples to generate (default: 1)",
    )
    parser.add_argument(
        "--diffusion_steps",
        type=int,
        default=100,
        help="Number of diffusion steps (default: 100)",
    )
    parser.add_argument(
        "--num_transition_frames",
        type=int,
        default=5,
        help="Number of frames to help transitioning (default: 5)",
    )
    parser.add_argument(
        "--constraints",
        type=str,
        default=None,
        help="Saved constraint list",
    )
    parser.add_argument(
        "--output",
        type=str,
        default="output",
        help="Output stem name: with one sample writes a single file per format (e.g. test.npz, test.csv); with multiple samples creates a folder and writes test_00.npz, test_01.npz, ... inside it. Used for NPZ, AMASS NPZ, CSV, and BVH.",
    )
    parser.add_argument(
        "--save_example_dir",
        action="store_true",
        help=(
            "Save demo-compatible example directories (each contains motion.npz, constraints.json, meta.json). "
            "With one sample, writes <output>_example/. With multiple samples, writes "
            "<output>_examples/<output>_example_00/, <output>_example_01/, ..."
        ),
    )
    parser.add_argument(
        "--bvh",
        action="store_true",
        help="Also export BVH (SOMA models only); uses the same stem as --output.",
    )
    parser.add_argument(
        "--bvh_standard_tpose",
        action="store_true",
        help="If exporting BVH, export with the rest pose being the standard T-pose rather than the rest pose consistent with the BONES-SEED dataset.",
    )
    parser.add_argument(
        "--no-postprocess",
        action="store_true",
        help="Don't apply motion post-processing to reduce foot skating (ignored for G1)",
    )
    parser.add_argument(
        "--seed",
        type=int,
        default=None,
        help="Seed for reproducible results",
    )
    parser.add_argument(
        "--input_folder",
        type=str,
        default=None,
        help="Folder containing meta.json and optional constraints.json. If set, generation settings are loaded from meta.json.",
    )
    parser.add_argument(
        "--cfg_type",
        type=str,
        default=argparse.SUPPRESS,
        choices=CFG_TYPES,
        help=(
            "Classifier-free guidance mode: nocfg (no CFG), regular (single scale on cond vs uncond), "
            "or separated (custom: separate text and constraint scales). "
            "Use with --cfg_weight as required by the mode."
        ),
    )
    parser.add_argument(
        "--cfg_weight",
        type=float,
        nargs="*",
        default=argparse.SUPPRESS,
        help=(
            "CFG scale(s): one float for regular, or two floats [text_weight, constraint_weight] for separated. "
            "Omit with --cfg_type nocfg. If omitted, two floats alone imply separated; one float alone implies regular."
        ),
    )
    return parser.parse_args()


def get_texts_and_num_frames_from_prompt(prompt: str, duration: str, fps: float):
    # Get the texts
    texts = [text.strip() for text in prompt.split(".")]
    texts = [text + "." for text in texts if text]

    nb_prompts = len(texts)

    # Get the durations
    if " " not in duration:
        duration_sec = float(duration)
        # same for all the prompts
        num_frames = [int(duration_sec * fps)] * nb_prompts
    else:
        durations = duration.split(" ")
        assert len(durations) == len(texts), "The number of durations should match the number of prompts"
        num_frames = [int(float(duration.strip()) * fps) for duration in durations]
        assert len(num_frames) == nb_prompts, "The number of durations should be 1 or match the number of texts"

    return texts, num_frames


def _single_file_path(path: str, ext: str) -> str:
    """Return path for a single output file (no folder).

    Adds ext if missing; creates parent dirs if any.
    """
    if not path.endswith(ext):
        path = path.rstrip(os.sep) + ext
    parent = os.path.dirname(path)
    if parent:
        os.makedirs(parent, exist_ok=True)
    return path


def _output_dir_and_path(path: str, default_base: str, ext: str):
    """Create output folder from path and return (dir_path, path_for_file_with_suffix, base_name).

    If path has an extension, folder name is the path stem; else the path is the folder name.
    base_name is the folder basename for _00, _01, ... when n_samples > 1.
    """
    folder = os.path.splitext(path)[0] if os.path.splitext(path)[1] else path
    os.makedirs(folder, exist_ok=True)
    base_name = os.path.basename(folder.rstrip(os.sep))
    return folder, os.path.join(folder, default_base + ext), base_name


def resolve_cfg_kwargs(args: argparse.Namespace, meta: Optional[Dict[str, Any]]) -> Dict[str, Any]:
    """Resolve cfg_type / cfg_weight for model(...).

    Precedence: explicit CLI (--cfg_type / --cfg_weight) overrides meta.json ``cfg``;
    if neither applies, returns {} so the model uses its own defaults.
    """
    ns = vars(args)
    has_type = "cfg_type" in ns
    has_wflag = "cfg_weight" in ns
    cli_type = ns.get("cfg_type")
    cli_w = ns.get("cfg_weight")

    if has_wflag:
        if cli_w is None or len(cli_w) == 0:
            raise ValueError("--cfg_weight requires one float (regular) or two floats (separated).")

    if has_type and cli_type == "nocfg":
        if has_wflag:
            raise ValueError("--cfg_weight is not used with --cfg_type nocfg.")
        return {"cfg_type": "nocfg"}

    if has_type or has_wflag:
        if has_type:
            eff_type = cli_type
            if has_wflag:
                if eff_type == "regular" and len(cli_w) != 1:
                    raise ValueError("--cfg_type regular requires exactly one --cfg_weight value.")
                if eff_type == "separated" and len(cli_w) != 2:
                    raise ValueError("--cfg_type separated requires exactly two --cfg_weight values.")
            else:
                if eff_type == "regular":
                    raise ValueError("--cfg_type regular requires --cfg_weight with one float.")
                if eff_type == "separated":
                    raise ValueError("--cfg_type separated requires --cfg_weight with two floats.")
        else:
            if len(cli_w) == 1:
                eff_type = "regular"
            elif len(cli_w) == 2:
                eff_type = "separated"
            else:
                raise ValueError("--cfg_weight expects 1 float (regular) or 2 floats (separated).")

        if eff_type == "regular":
            return {"cfg_type": "regular", "cfg_weight": float(cli_w[0])}
        return {"cfg_type": "separated", "cfg_weight": [float(cli_w[0]), float(cli_w[1])]}

    if meta and isinstance(meta.get("cfg"), dict):
        cfg = meta["cfg"]
        enabled = cfg.get("enabled", True)
        if not enabled:
            return {"cfg_type": "nocfg"}
        return {
            "cfg_type": "separated",
            "cfg_weight": [
                float(cfg.get("text_weight", 2.0)),
                float(cfg.get("constraint_weight", 2.0)),
            ],
        }

    return {}


def get_generation_inputs(args, fps: float):
    """Get texts/num_frames and parameter overrides from either CLI or input_folder."""
    if args.input_folder is None:
        if not args.prompt:
            raise ValueError("Either provide 'prompt' or '--input_folder'.")
        texts, num_frames = get_texts_and_num_frames_from_prompt(args.prompt, args.duration, fps)
        return {
            "texts": texts,
            "num_frames": num_frames,
            "num_samples": args.num_samples,
            "diffusion_steps": args.diffusion_steps,
            "seed": args.seed,
            "constraints_path": args.constraints,
            "meta": None,
        }

    meta_path = os.path.join(args.input_folder, "meta.json")
    meta = load_json(meta_path)
    texts, durations_sec = load_prompts_from_meta(meta_path)
    num_frames = [int(float(duration) * fps) for duration in durations_sec]

    constraints_path = args.constraints
    default_constraints_path = os.path.join(args.input_folder, "constraints.json")
    if constraints_path is None and os.path.exists(default_constraints_path):
        constraints_path = default_constraints_path

    return {
        "texts": texts,
        "num_frames": num_frames,
        "num_samples": meta.get("num_samples", args.num_samples),
        "diffusion_steps": meta.get("diffusion_steps", args.diffusion_steps),
        "seed": meta.get("seed", args.seed),
        "constraints_path": constraints_path,
        "meta": meta,
    }


def main():
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    args = parse_args()

    # Load model (resolution of name done inside load_model)
    model, resolved_model = load_model(
        args.model,
        device=device,
        default_family="Kimodo",
        return_resolved_name=True,
    )
    info = get_model_info(resolved_model)
    display = info.display_name if info else resolved_model
    print(f"Loaded model: {display} ({resolved_model})")

    # Get generation inputs
    generation_inputs = get_generation_inputs(args, model.fps)
    texts = generation_inputs["texts"]
    num_frames = generation_inputs["num_frames"]
    print("Will generate motions with the following prompts")
    for text, num_frame in zip(texts, num_frames):
        print(f"    '{text}' with {num_frame} frames")

    # Load constraints
    constraints_path = generation_inputs["constraints_path"]
    if constraints_path:
        constraint_lst = load_constraints_lst(constraints_path, model.skeleton)
    else:
        constraint_lst = []

    if constraint_lst:
        print(f"Using {len(constraint_lst)} set of constraints")
        for constraint in constraint_lst:
            print(f"    {constraint}")

    if generation_inputs["seed"] is not None:
        seed_everything(generation_inputs["seed"])

    cfg_kwargs = resolve_cfg_kwargs(args, generation_inputs.get("meta"))
    if cfg_kwargs:
        ct = cfg_kwargs.get("cfg_type")
        cw = cfg_kwargs.get("cfg_weight")
        if cw is not None:
            print(f"Using CFG: cfg_type={ct!r}, cfg_weight={cw!r}")
        else:
            print(f"Using CFG: cfg_type={ct!r}")

    # G1: postprocessing is disabled (does not work well for this model).
    use_postprocess = False if "g1" in resolved_model else (not args.no_postprocess)
    output = model(
        texts,
        num_frames,
        constraint_lst=constraint_lst,
        num_denoising_steps=generation_inputs["diffusion_steps"],
        num_samples=generation_inputs["num_samples"],
        multi_prompt=True,
        num_transition_frames=args.num_transition_frames,
        post_processing=use_postprocess,
        return_numpy=True,
        **cfg_kwargs,
    )

    n_samples = int(output["posed_joints"].shape[0])
    # Parse the output stem once; all formats (NPZ, AMASS NPZ, CSV, BVH) use this base name.
    output_base = args.output

    # Save the NPZ output
    if n_samples == 1:
        npz_path = _single_file_path(output_base, ".npz")
        print(f"Saving the npz output to {npz_path}")
        single = {
            k: (v[0] if hasattr(v, "shape") and len(v.shape) > 0 and v.shape[0] == n_samples else v)
            for k, v in output.items()
        }
        save_kimodo_npz(npz_path, single)
    else:
        out_dir, _, base_name = _output_dir_and_path(output_base, "motion", ".npz")
        print(f"Saving the npz output to {out_dir}/ ({base_name}_00.npz ...)")
        for i in range(n_samples):
            single = {
                k: (v[i] if hasattr(v, "shape") and len(v.shape) > 0 and v.shape[0] == n_samples else v)
                for k, v in output.items()
            }
            save_kimodo_npz(os.path.join(out_dir, f"{base_name}_{i:02d}.npz"), single)

    # Save the AMASS NPZ output
    if resolved_model == "kimodo-smplx-rp":
        from kimodo.exports.smplx import AMASSConverter

        converter = AMASSConverter(skeleton=model.skeleton, fps=model.fps)
        if n_samples == 1:
            # Use distinct name so AMASS NPZ does not overwrite the main NPZ
            amass_single_path = _single_file_path(output_base + "_amass", ".npz")
            print(f"Saving the amass output to {amass_single_path}")
            converter.convert_save_npz(output, amass_single_path)
        else:
            out_dir, _, base_name = _output_dir_and_path(output_base, "amass", ".npz")
            print(f"Saving the amass output to {out_dir}/ (amass_00.npz ...)")
            converter.convert_save_npz(output, os.path.join(out_dir, "amass.npz"))

    # Save the CSV output
    if resolved_model == "kimodo-g1-rp":
        from kimodo.exports.mujoco import MujocoQposConverter

        converter = MujocoQposConverter(model.skeleton)
        qpos = converter.dict_to_qpos(output, device)
        if n_samples == 1:
            csv_path = _single_file_path(output_base, ".csv")
            print(f"Saving the csv output to {csv_path}")
            converter.save_csv(qpos, csv_path)
        else:
            out_dir, _, base_name = _output_dir_and_path(output_base, "qpos", ".csv")
            print(f"Saving the csv output to {out_dir}/ ({base_name}_00.csv ...)")
            converter.save_csv(qpos, os.path.join(out_dir, base_name + ".csv"))

    # Save the BVH output
    if args.bvh:
        skeleton = model.skeleton
        if "somaskel" not in skeleton.name:
            print("BVH export is only supported for SOMA skeletons. Skipping --bvh.")
        else:
            from kimodo.exports.bvh import save_motion_bvh
            from kimodo.skeleton import SOMASkeleton30, global_rots_to_local_rots

            if isinstance(skeleton, SOMASkeleton30):
                # Motion has already been converted to somaskel77 within the model for output
                skeleton = skeleton.somaskel77.to(device)

            if n_samples == 1:
                bvh_path = _single_file_path(output_base, ".bvh")
                print(f"Saving the BVH output to {bvh_path}")
                joints_pos = torch.from_numpy(output["posed_joints"][0]).to(device)
                joints_rot = torch.from_numpy(output["global_rot_mats"][0]).to(device)
                local_rot_mats = global_rots_to_local_rots(joints_rot, skeleton)
                root_positions = joints_pos[:, skeleton.root_idx, :]
                save_motion_bvh(
                    bvh_path,
                    local_rot_mats,
                    root_positions,
                    skeleton=skeleton,
                    fps=model.fps,
                    standard_tpose=args.bvh_standard_tpose,
                )
            else:
                out_dir, _, base_name = _output_dir_and_path(output_base, "motion", ".bvh")
                print(f"Saving the BVH output to {out_dir}/ ({base_name}_00.bvh ...)")
                for i in range(n_samples):
                    joints_pos = torch.from_numpy(output["posed_joints"][i]).to(device)
                    joints_rot = torch.from_numpy(output["global_rot_mats"][i]).to(device)
                    local_rot_mats = global_rots_to_local_rots(joints_rot, skeleton)
                    root_positions = joints_pos[:, skeleton.root_idx, :]
                    save_motion_bvh(
                        os.path.join(out_dir, f"{base_name}_{i:02d}.bvh"),
                        local_rot_mats,
                        root_positions,
                        skeleton=skeleton,
                        fps=model.fps,
                        standard_tpose=args.bvh_standard_tpose,
                    )

    # Save the example directory
    if args.save_example_dir:
        output_stem = os.path.splitext(output_base)[0].rstrip(os.sep)
        base_name = os.path.basename(output_stem)

        if n_samples == 1:
            parent_dir = None
            example_dirs = [output_stem + "_example"]
        else:
            parent_dir = output_stem + "_examples"
            if os.path.exists(parent_dir):
                raise FileExistsError(f"Example directory already exists: {parent_dir}")
            os.makedirs(parent_dir)
            example_dirs = [
                os.path.join(parent_dir, f"{base_name}_example_{i:02d}") for i in range(n_samples)
            ]

        durations_sec = [nf / model.fps for nf in num_frames]
        if len(texts) == 1:
            meta_info: dict = {"text": texts[0], "duration": durations_sec[0]}
        else:
            meta_info = {"texts": texts, "durations": durations_sec}
        meta_info["num_samples"] = generation_inputs["num_samples"]
        if generation_inputs["seed"] is not None:
            meta_info["seed"] = generation_inputs["seed"]
        meta_info["diffusion_steps"] = generation_inputs["diffusion_steps"]
        if cfg_kwargs:
            cfg_type = cfg_kwargs.get("cfg_type", "nocfg")
            cfg_weight = cfg_kwargs.get("cfg_weight")
            if cfg_type == "nocfg":
                meta_info["cfg"] = {"enabled": False}
            elif cfg_type == "separated" and isinstance(cfg_weight, list) and len(cfg_weight) == 2:
                meta_info["cfg"] = {
                    "enabled": True,
                    "text_weight": cfg_weight[0],
                    "constraint_weight": cfg_weight[1],
                }
            elif cfg_type == "regular" and cfg_weight is not None:
                meta_info["cfg"] = {
                    "enabled": True,
                    "text_weight": float(cfg_weight),
                    "constraint_weight": float(cfg_weight),
                }

        for i, example_dir in enumerate(example_dirs):
            if os.path.exists(example_dir):
                raise FileExistsError(f"Example directory already exists: {example_dir}")
            os.makedirs(example_dir)
            sample = {
                k: (v[i] if hasattr(v, "shape") and len(v.shape) > 0 and v.shape[0] == n_samples else v)
                for k, v in output.items()
            }
            save_kimodo_npz(os.path.join(example_dir, "motion.npz"), sample)
            if constraints_path:
                shutil.copy2(constraints_path, os.path.join(example_dir, "constraints.json"))
            save_json(os.path.join(example_dir, "meta.json"), meta_info)

        if parent_dir is None:
            print(f"Saved demo example to {example_dirs[0]}")
        else:
            print(f"Saved {n_samples} demo examples to {parent_dir}/")


if __name__ == "__main__":
    main()


================================================
FILE: kimodo/scripts/gradio_theme.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import gradio as gr


def get_gradio_theme(remove_gradio_footer=False):
    theme = gr.themes.Base(
        primary_hue="blue",
        text_size=gr.themes.Size(lg="16px", md="14px", sm="12px", xl="22px", xs="10px", xxl="35px", xxs="9px"),
        font=[
            gr.themes.GoogleFont("Source Sans Pro"),
            "BlinkMacSystemFont",
            "Segoe UI",
            "Roboto",
        ],
    ).set(
        body_text_color="*neutral_900",
        body_text_color_subdued="*neutral_500",
        body_text_color_subdued_dark="*neutral_500",
    )

    css = """
        @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700;900&display=swap');

        /* Base text */
        body, .gradio-container {
          font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', sans-serif !important;
          font-size: 16px !important;
        }

        h1 {
          // font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
          font-weight: 700 !important;
          font-size: 2.75rem !important;
          // margin: 0px;
          padding: 1.5rem 0px 0px 0px;
          // line-height: 1.2;
        }
        h2 {
          // font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif !important;
          font-weight: 600 !important;
          font-size: 1.5rem !important;
        }
    """
    if remove_gradio_footer:
        css += """
        footer {
        display: none !important;
        }
        """
    return theme, css


================================================
FILE: kimodo/scripts/lock_requirements.py
================================================
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Regenerate `docker_requirements.txt` from `docker_requirements.in` using `uv`, targeting the
Docker image runtime, and filter out `torch` + CUDA wheels so Docker doesn't try to reinstall
PyTorch.

Usage:
  python3 kimodo/scripts/lock_requirements.py

Optional args:
  --python-version 3.10
  --python-platform x86_64-manylinux2014
  --in docker_requirements.in
  --out docker_requirements.txt
"""

import argparse
import shutil
import subprocess
from pathlib import Path
from typing import Iterable

DEFAULT_PYTHON_VERSION = "3.10"
DEFAULT_PYTHON_PLATFORM = "x86_64-manylinux2014"

# Packages to omit from the lockfile because the Docker base image already provides torch+CUDA.
OMIT_NAMES = {"torch", "triton", "networkx", "sympy", "mpmath"}
OMIT_PREFIXES = ("nvidia-",)


def _run(cmd: list[str]) -> None:
    print("+", " ".join(cmd))
    subprocess.run(cmd, check=True)


def _ensure_uv() -> None:
    if shutil.which("uv") is None:
        raise SystemExit(
            "ERROR: `uv` is not installed or not on PATH.\n"
            "Install it (one of):\n"
            "  - pipx install uv\n"
            "  - python -m pip install --user uv\n"
            "Then rerun this script."
        )


def _parse_req_name(line: str) -> str:
    # uv emits `name==version` lines.
    s = line.strip()
    if "==" in s:
        return s.split("==", 1)[0].strip()
    # Fallback: treat the whole token before space as name.
    return s.split()[0].strip()


def _iter_blocks(lines: list[str]) -> Iterable[list[str]]:
    """Split a docker_requirements.txt into blocks: [top-level req line + indented comments]."""
    i = 0
    n = len(lines)
    while i < n:
        line = lines[i]
        # Header/comments/blank
        if line.startswith("#") or line.strip() == "":
            yield [line]
            i += 1
            continue

        # Top-level requirement line
        if not line.startswith(" "):
            block = [line]
            i += 1
            while i < n and (lines[i].startswith(" ") or lines[i].strip() == "" or lines[i].startswith("#")):
                # Stop if we hit another top-level requirement line
                if not lines[i].startswith(" ") and not lines[i].startswith("#") and lines[i].strip() != "":
                    break
                block.append(lines[i])
                i += 1
            yield block
            continue

        # Indented line without a requirement header (shouldn't happen, but keep)
        yield [line]
        i += 1


def _should_omit(req_line: str) -> bool:
    name = _parse_req_name(req_line)
    if name in OMIT_NAMES:
        return True
    for pfx in OMIT_PREFIXES:
        if name.startswith(pfx):
            return True
    return False


def filter_lockfile(path: Path) -> None:
    lines = path.read_text(encoding="utf-8").splitlines(True)
    out: list[str] = []

    inserted_note = False
    for block in _iter_blocks(lines):
        first = block[0]

        # After the uv header lines, insert a short note once.
        if (not inserted_note) and first.startswith("# This file was autogenerated by uv"):
            out.extend(block)
            out.append(
                "# NOTE: `torch` (and its CUDA wheels) are intentionally omitted from this lockfile.\n"
                "# The Docker base image (nvcr.io/nvidia/pytorch) already provides a tested PyTorch build.\n"
                "#\n"
            )
            inserted_note = True
            continue

        if first.startswith("#") or first.strip() == "":
            out.extend(block)
            continue

        if _should_omit(first):
            continue

        out.extend(block)

    path.write_text("".join(out), encoding="utf-8")


def main() -> None:
    ap = argparse.ArgumentParser()
    ap.add_argument("--in", dest="in_file", default="docker_requirements.in")
    ap.add_argument("--out", dest="out_file", default="docker_requirements.txt")
    ap.add_argument("--python-version", default=DEFAULT_PYTHON_VERSION)
    ap.add_argument("--python-platform", default=DEFAULT_PYTHON_PLATFORM)
    args = ap.parse_args()

    _ensure_uv()

    in_path = Path(args.in_file)
    out_path = Path(args.out_file)
    if not in_path.exists():
        raise SystemExit(f"ERROR: missing {in_path}")

    _run(
        [
            "uv",
            "pip",
            "compile",
            "-U",
            str(in_path),
            "-o",
            str(out_path),
            "--python-version",
            args.python_version,
            "--python-platform",
            args.python_platform,
        ]
    )
    filter_lockfile(out_path)
    print(f"OK: wrote {out_path} (filtered torch/CUDA wheels)")


if __name__ == "__main__":
    main()


================================================
FILE: kimodo/scripts/motion_convert.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""CLI entry-point for motion format conversion.

Library conversion logic lives in :mod:`kimodo.exports.motion_convert_lib`.
Format detection utilities live in :mod:`kimodo.exports.motion_formats`.
"""

from __future__ import annotations

import argparse
import sys

from kimodo.exports.motion_convert_lib import convert_motion_files


def run_convert(
    input_path: str,
    output_path: str,
    from_fmt: str | None,
    to_fmt: str | None,
    source_fps: float | None,
    z_up: bool,
    mujoco_rest_zero: bool,
    bvh_standard_tpose: bool = False,
) -> None:
    """Thin wrapper kept for backward compatibility; delegates to :func:`convert_motion_files`."""
    convert_motion_files(
        input_path,
        output_path,
        from_fmt=from_fmt,
        to_fmt=to_fmt,
        source_fps=source_fps,
        z_up=z_up,
        mujoco_rest_zero=mujoco_rest_zero,
        bvh_standard_tpose=bvh_standard_tpose,
    )


def build_argparser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        description="Convert Kimodo NPZ, AMASS NPZ, SOMA BVH, and G1 MuJoCo CSV.",
    )
    p.add_argument("input", help="Input file path")
    p.add_argument("output", help="Output file path")
    p.add_argument(
        "--from",
        dest="from_fmt",
        choices=("amass", "kimodo", "soma-bvh", "g1-csv"),
        default=None,
        help="Input format (default: infer from file contents/extension)",
    )
    p.add_argument(
        "--to",
        dest="to_fmt",
        choices=("kimodo", "amass", "soma-bvh", "g1-csv"),
        default=None,
        help="Output format (default: infer from output extension)",
    )
    p.add_argument(
        "--source-fps",
        "--fps",
        dest="source_fps",
        type=float,
        default=None,
        help=(
            "Source motion frame rate in Hz (default: auto-detected from "
            "BVH Frame Time / AMASS mocap_frame_rate, or 30 Hz). "
            "Kimodo NPZ output is always resampled to 30 Hz."
        ),
    )
    p.add_argument(
        "--no-z-up",
        action="store_true",
        help="For AMASS paths: disable Z-up transform (treat trans/orient as already Kimodo Y-up).",
    )
    p.add_argument(
        "--mujoco-rest-zero",
        action="store_true",
        default=False,
        help="For G1 CSV: joint angles relative to MuJoCo rest (must match export).",
    )
    p.add_argument(
        "--bvh_standard_tpose",
        action="store_true",
        default=False,
        help="If input or output is BVH: the BVH file uses the standard T-pose as its rest pose instead of the BONES-SEED rest pose.",
    )
    return p


def main(argv: list[str] | None = None) -> int:
    args = build_argparser().parse_args(argv)
    try:
        convert_motion_files(
            args.input,
            args.output,
            from_fmt=args.from_fmt,
            to_fmt=args.to_fmt,
            source_fps=args.source_fps,
            z_up=not args.no_z_up,
            mujoco_rest_zero=args.mujoco_rest_zero,
            bvh_standard_tpose=args.bvh_standard_tpose,
        )
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())


================================================
FILE: kimodo/scripts/mujoco_load.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import time

import mujoco
import mujoco.viewer
import numpy as np

from kimodo.assets import skeleton_asset_path

qpos = np.loadtxt("motion.csv", delimiter=",")
model = mujoco.MjModel.from_xml_path(str(skeleton_asset_path("g1skel34", "xml", "g1.xml")))
data = mujoco.MjData(model)

fps = 30  # adjust to your intended playback rate

with mujoco.viewer.launch_passive(model, data) as viewer:
    # loop the motion
    while viewer.is_running():
        for frame in qpos:
            data.qpos[:] = frame
            mujoco.mj_forward(model, data)
            viewer.sync()
            time.sleep(1.0 / fps)


================================================
FILE: kimodo/scripts/run_text_encoder_server.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import argparse
import os

import gradio as gr
import numpy as np

from kimodo.model import resolve_target

from .gradio_theme import get_gradio_theme

os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES"
DEFAULT_TEXT = "A person walks and falls to the ground."
DEFAULT_SERVER_NAME = "0.0.0.0"
DEFAULT_SERVER_PORT = 9550
DEFAULT_TMP_FOLDER = "/tmp/text_encoder/"
DEFAULT_TEXT_ENCODER = "llm2vec"
TEXT_ENCODER_PRESETS = {
    "llm2vec": {
        "target": "kimodo.model.LLM2VecEncoder",
        "kwargs": {
            "base_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp",
            "peft_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised",
            "dtype": "bfloat16",
            "llm_dim": 4096,
            "device": "auto",
        },
        "display_name": "LLM2Vec",
    }
}


class DemoWrapper:
    def __init__(self, text_encoder, tmp_folder):
        self.text_encoder = text_encoder
        self.tmp_folder = tmp_folder

    def __call__(self, text, filename, progress=gr.Progress()):
        # Compute text embedding
        tensor, length = self.text_encoder(text)
        embedding = tensor[:length]
        embedding = embedding.cpu().numpy()

        # Save text embedding
        path = os.path.join(self.tmp_folder, filename)
        np.save(path, embedding)

        output_title = gr.Markdown(visible=True)
        output_text = gr.Markdown(visible=True, value=f"Text: {text}")
        download = gr.DownloadButton(visible=True, value=path)
        return download, output_title, output_text


def _get_env(name: str, default):
    return os.getenv(name, default)


def _build_text_encoder(name: str, fp32: bool = False):
    if name not in TEXT_ENCODER_PRESETS:
        available = ", ".join(sorted(TEXT_ENCODER_PRESETS))
        raise ValueError(f"Unknown TEXT_ENCODER='{name}'. Available: {available}")
    preset = TEXT_ENCODER_PRESETS[name]
    target_cls = resolve_target(preset["target"])
    if fp32:
        preset["kwargs"]["dtype"] = "float32"
    return target_cls(**preset["kwargs"])


def parse_args():
    parser = argparse.ArgumentParser(description="Run text encoder Gradio server.")
    parser.add_argument(
        "--text-encoder",
        default=_get_env("TEXT_ENCODER", DEFAULT_TEXT_ENCODER),
        choices=sorted(TEXT_ENCODER_PRESETS.keys()),
        help="Text encoder preset.",
    )
    parser.add_argument(
        "--tmp-folder",
        default=_get_env("TEXT_ENCODER_TMP_FOLDER", DEFAULT_TMP_FOLDER),
    )
    parser.add_argument(
        "--fp32",
        action="store_true",
        help="Uses fp32 for the text encoder rather than default bfloat16.",
    )
    return parser.parse_args()


def main():
    args = parse_args()
    server_name = _get_env("GRADIO_SERVER_NAME", DEFAULT_SERVER_NAME)
    server_port = int(_get_env("GRADIO_SERVER_PORT", DEFAULT_SERVER_PORT))
    theme, css = get_gradio_theme()
    os.makedirs(args.tmp_folder, exist_ok=True)
    text_encoder = _build_text_encoder(args.text_encoder, args.fp32)
    display_name = TEXT_ENCODER_PRESETS[args.text_encoder]["display_name"]
    demo_wrapper_fn = DemoWrapper(text_encoder, args.tmp_folder)

    with gr.Blocks(title="Text encoder", css=css, theme=theme) as demo:
        gr.Markdown(f"# Text encoder: {display_name}")
        gr.Markdown("## Description")
        gr.Markdown("Get a embeddings from a text.")

        gr.Markdown("## Inputs")
        with gr.Row():
            text = gr.Textbox(
                placeholder="Type the motion you want to generate with a sentence",
                show_label=True,
                label="Text prompt",
                value=DEFAULT_TEXT,
                type="text",
            )
        with gr.Row(scale=3):
            with gr.Column(scale=1):
                btn = gr.Button("Encode", variant="primary")
            with gr.Column(scale=1):
                clear = gr.Button("Clear", variant="secondary")
            with gr.Column(scale=3):
                pass

        output_title = gr.Markdown("## Outputs", visible=False)
        output_text = gr.Markdown("", visible=False)
        with gr.Row(scale=3):
            with gr.Column(scale=1):
                download = gr.DownloadButton("Download", variant="primary", visible=False)
            with gr.Column(scale=4):
                pass

        filename = gr.Textbox(
            visible=False,
            value="embedding.npy",
        )

        def clear_fn():
            return [
                gr.DownloadButton(visible=False),
                gr.Markdown(visible=False),
                gr.Markdown(visible=False),
            ]

        outputs = [download, output_title, output_text]

        gr.on(
            triggers=[text.submit, btn.click],
            fn=clear_fn,
            inputs=None,
            outputs=outputs,
        ).then(
            fn=demo_wrapper_fn,
            inputs=[text, filename],
            outputs=outputs,
        )

        def download_file():
            return gr.DownloadButton()

        download.click(
            fn=download_file,
            inputs=None,
            outputs=[download],
        )
        clear.click(fn=clear_fn, inputs=None, outputs=outputs)

    demo.launch(server_name=server_name, server_port=server_port)


if __name__ == "__main__":
    main()


================================================
FILE: kimodo/skeleton/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Skeleton definitions and utilities used across kimodo."""

from .base import SkeletonBase
from .definitions import (
    G1Skeleton34,
    SMPLXSkeleton22,
    SOMASkeleton30,
    SOMASkeleton77,
)
from .kinematics import batch_rigid_transform, fk
from .registry import build_skeleton
from .transforms import global_rots_to_local_rots, to_standard_tpose

__all__ = [
    "SkeletonBase",
    "G1Skeleton34",
    "SOMASkeleton30",
    "SOMASkeleton77",
    "SMPLXSkeleton22",
    "batch_rigid_transform",
    "fk",
    "build_skeleton",
    "global_rots_to_local_rots",
    "to_standard_tpose",
]


================================================
FILE: kimodo/skeleton/base.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Base skeleton class: hierarchy, joint metadata, and helpers for kinematics and motion."""

from pathlib import Path
from typing import Optional

import torch

from kimodo.assets import skeleton_asset_path

from .kinematics import fk
from .transforms import (
    from_standard_tpose,
    global_rots_to_local_rots,
    to_standard_tpose,
)


class SkeletonBase(torch.nn.Module):
    """Base class that stores a skeleton hierarchy and helper metadata.

    Subclasses define the static joint layout (joint names and parent links) and semantic groups
    (feet, hands, hips). This class builds index mappings, parent tensors, and convenience helpers
    used by kinematics, constraints, and motion conversion utilities.
    """

    # these should be defined in the subclass
    name = None
    bone_order_names_with_parents = None
    bone_order_names_no_root = None
    root_idx = None
    foot_joint_names = None
    foot_joint_idx = None
    hip_joint_names = None  # in order [right, left]
    hip_joint_idx = None  # in order [right, left]

    def __init__(
        self,
        folder: Optional[str] = None,
        name: Optional[str] = None,
        load: bool = True,
        **kwargs,  # to catch addition args in configs
    ):
        """Initialize a skeleton instance and optional neutral-pose assets.

        Args:
            folder: Folder containing serialized skeleton assets (for example
                `joints.p` and optional `standard_t_pose_global_offsets_rots.p`).
            name: Optional runtime name used to validate subclass compatibility.
            load: Whether to load tensor assets from `folder`.
            **kwargs: Unused extra config keys kept for config compatibility.
        """
        super().__init__()

        if name is not None:
            # Check that the name is not too far from the actual skeleton class name
            assert self.name in name
            self.name = name

        if folder is None:
            # Take the skeleton asset folder of the repo from the name
            # in case we don't override it
            folder = str(skeleton_asset_path(self.name))

        self.folder = folder

        self.dim = len(self.bone_order_names_with_parents)

        if load and folder is not None:
            pfolder = Path(folder)
            neutral_joints = torch.load(pfolder / "joints.p").squeeze()
            self.register_buffer("neutral_joints", neutral_joints, persistent=False)

            if (pfolder / "bvh_joints.p").exists():
                bvh_neutral_joints = torch.load(pfolder / "bvh_joints.p").squeeze()
                self.register_buffer("bvh_neutral_joints", bvh_neutral_joints, persistent=False)

            global_offset_path = pfolder / "standard_t_pose_global_offsets_rots.p"
            if global_offset_path.exists():
                global_rot_offsets = torch.load(global_offset_path).squeeze()
                self.register_buffer("global_rot_offsets", global_rot_offsets, persistent=False)
            # Usefull for g1, where the rest pose is not zero
            baked_rest_path = pfolder / "rest_pose_local_rot.p"
            if baked_rest_path.exists():
                rest_pose_local_rot = torch.load(baked_rest_path).squeeze()
                self.register_buffer("rest_pose_local_rot", rest_pose_local_rot, persistent=False)

        self.bone_order_names = [x for x, y in self.bone_order_names_with_parents]

        self.bone_parents = dict(self.bone_order_names_with_parents)
        self.bone_index = {x: idx for idx, x in enumerate(self.bone_order_names)}
        self.bone_order_names_index = self.bone_index

        # create the parents tensor on the fly
        joint_parents = torch.tensor(
            [-1 if (y := self.bone_parents[x]) is None else self.bone_index[y] for x in self.bone_order_names]
        )
        self.register_buffer("joint_parents", joint_parents, persistent=False)

        self.nbjoints = len(self.bone_order_names)

        # check lengths
        assert self.nbjoints == len(self.joint_parents)
        if "neutral_joints" in self.__dict__:
            assert self.nbjoints == len(self.neutral_joints)

        root_indices = torch.where(joint_parents == -1)[0]
        assert len(root_indices) == 1  # should be one root only
        self.root_idx = root_indices[0].item()

        if "neutral_joints" in self.__dict__:
            assert (self.neutral_joints[0] == 0).all()

        # remove the root
        self.bone_order_names_no_root = (
            self.bone_order_names[: self.root_idx] + self.bone_order_names[self.root_idx + 1 :]
        )

        self.foot_joint_names = self.left_foot_joint_names + self.right_foot_joint_names
        self.foot_joint_names_index = {x: idx for idx, x in enumerate(self.foot_joint_names)}

        self.left_foot_joint_idx = [
            self.bone_order_names.index(foot_joint) for foot_joint in self.left_foot_joint_names
        ]

        self.right_foot_joint_idx = [
            self.bone_order_names.index(foot_joint) for foot_joint in self.right_foot_joint_names
        ]

        self.foot_joint_idx = self.left_foot_joint_idx + self.right_foot_joint_idx

        self.hip_joint_idx = [self.bone_order_names.index(hip_joint) for hip_joint in self.hip_joint_names]

    def expand_joint_names(self, joint_names):
        """Expand base EE names [LeftFoot, RightFoot, LeftHand, RightHand] actual joint names to
        constrain position and rotations.

        Args:
            joint_names: list of list of base EE names to constrain

        Returns:
            rot_joint_names: list of list of joint names to constrain rotations
            pos_joint_names: list of list of joint names to constrain positions
        """

        base_ee = ["LeftFoot", "RightFoot", "LeftHand", "RightHand", "Hips"]

        pelvis_name = self.bone_order_names[self.root_idx]

        base_pos_names = [
            self.left_foot_joint_names,
            self.right_foot_joint_names,
            self.left_hand_joint_names,
            self.right_hand_joint_names,
            [pelvis_name],
        ]
        # base of each chain
        base_rot_names = [
            self.left_foot_joint_names[:1],
            self.right_foot_joint_names[:1],
            self.left_hand_joint_names[:1],
            self.right_hand_joint_names[:1],
            [pelvis_name],
        ]
        rot_joint_names = []
        pos_joint_names = []
        # loop through each EE joint group to constrain in the current keyframe
        for jname in joint_names:
            idx = base_ee.index(jname)
            rot_joint_names += base_rot_names[idx]
            pos_joint_names += base_pos_names[idx]
        return rot_joint_names, pos_joint_names

    def expand_joint_names_batched(self, joint_names):
        """Expand base EE names [LeftFoot, RightFoot, LeftHand, RightHand] actual joint names to
        constrain position and rotations.

        Args:
            joint_names: list of list of base EE names to constrain

        Returns:
            rot_joint_names: list of list of joint names to constrain rotations
            pos_joint_names: list of list of joint names to constrain positions
        """

        base_ee = ["LeftFoot", "RightFoot", "LeftHand", "RightHand", "Hips"]

        pelvis_name = self.bone_order_names[self.root_idx]

        base_pos_names = [
            self.left_foot_joint_names,
            self.right_foot_joint_names,
            self.left_hand_joint_names,
            self.right_hand_joint_names,
            [pelvis_name],
        ]
        # base of each chain
        base_rot_names = [
            self.left_foot_joint_names[:1],
            self.right_foot_joint_names[:1],
            self.left_hand_joint_names[:1],
            self.right_hand_joint_names[:1],
            [pelvis_name],
        ]
        # loop through each keyframe
        rot_joint_names = []
        pos_joint_names = []
        for key_joint_names in joint_names:
            key_rot_names = []
            key_pos_names = []
            # loop through each EE joint group to constrain in the current keyframe
            for jname in key_joint_names:
                idx = base_ee.index(jname)
                key_rot_names += base_rot_names[idx]
                key_pos_names += base_pos_names[idx]
            rot_joint_names.append(key_rot_names)
            pos_joint_names.append(key_pos_names)
        return rot_joint_names, pos_joint_names

    def __repr__(self):
        if self.folder is None:
            return f"{self.__class__.__name__}()"
        return f'{self.__class__.__name__}(folder="{self.folder}")'

    @property
    def device(self):
        """Device where neutral-joint buffers are stored.

        Returns 'cpu' if neutral_joints is not present.
        """
        if getattr(self, "neutral_joints", None) is None:
            return "cpu"
        return self.neutral_joints.device

    def fk(self, local_joint_rots: torch.Tensor, root_positions: torch.Tensor):
        """Run forward kinematics for this skeleton layout.

        Args:
            local_joint_rots: Local joint rotation matrices with shape
                `(..., J, 3, 3)`.
            root_positions: Root translations with shape `(..., 3)`.

        Returns:
            Tuple of `(global_joint_rots, posed_joints, posed_joints_norootpos)`.
        """
        global_joint_rots, posed_joints, posed_joints_norootpos = fk(local_joint_rots, root_positions, self)
        return global_joint_rots, posed_joints, posed_joints_norootpos

    def to_standard_tpose(self, local_rot_mats: torch.Tensor):
        """Convert local rotations into the skeleton's standard T-pose frame."""
        return to_standard_tpose(local_rot_mats, self)

    def from_standard_tpose(self, local_rot_mats: torch.Tensor):
        """Convert local rotations from the skeleton's standard T-pose frame."""
        return from_standard_tpose(local_rot_mats, self)

    def global_rots_to_local_rots(self, global_joint_rots: torch.Tensor):
        """Convert global joint rotations to local rotations for this hierarchy."""
        return global_rots_to_local_rots(global_joint_rots, self)

    def get_skel_slice(self, skeleton: "SkeletonBase"):
        """Build index mapping from another skeleton into this skeleton order.

        Args:
            skeleton: Source skeleton whose joint order is used by input tensors.

        Returns:
            A list of source indices ordered as `self.bone_order_names`.

        Raises:
            ValueError: If at least one required joint is missing from `skeleton`.
        """
        try:
            skel_slice = [skeleton.bone_index[x] for x in self.bone_order_names]
        except KeyError:
            raise ValueError("The current skeleton contain joints that are not in the input")
        return skel_slice


================================================
FILE: kimodo/skeleton/bvh.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""BVH parsing utilities and skeleton/animation conversion helpers."""

import re
from typing import Optional, Tuple

import numpy as np
import torch
from scipy.spatial.transform import Rotation


class BvhNode:
    """Lightweight tree node used to represent parsed BVH hierarchy lines."""

    def __init__(self, value=[], parent=None):
        """Create a node from tokenized BVH line values."""
        self.value = value
        self.children = []
        self.parent = parent
        if self.parent:
            self.parent.add_child(self)

    def add_child(self, item):
        """Attach a child node and set its parent reference."""
        item.parent = self
        self.children.append(item)

    def filter(self, key):
        """Yield direct children whose first token matches `key`."""
        for child in self.children:
            if child.value[0] == key:
                yield child

    def __iter__(self):
        for child in self.children:
            yield child

    def __getitem__(self, key):
        """Return all tokens following `key` from the first matching child node."""
        for child in self.children:
            for index, item in enumerate(child.value):
                if item == key:
                    if index + 1 >= len(child.value):
                        return None
                    else:
                        return child.value[index + 1 :]
        raise IndexError("key {} not found".format(key))

    def __repr__(self):
        return str(" ".join(self.value))

    @property
    def name(self):
        """Joint name for `ROOT`/`JOINT` entries."""
        return self.value[1]


class Bvh:
    """Parsed BVH file with hierarchy graph and per-frame channel values."""

    def __init__(self, data: str, backend: Optional[str] = "graph"):
        """
        Args:
            data: Raw BVH file content.
            backend: Parsing mode. `"graph"` keeps list-based frame storage,
                while `"np"` precomputes a NumPy array and index caches.
        """
        self.data = data
        self.root = BvhNode()
        self.frames = []
        self.backend = backend
        self.tokenize()
        if self.backend == "np":
            # cache important info for quick access later
            self.build_data_array()
        elif self.backend == "graph":
            pass
        else:
            raise ValueError(f"Unknown backend for BVH loading: {backend}")

    def build_data_array(self):
        """Build cached channel indices and contiguous frame data for `"np"` backend."""
        joints = self.get_joints()
        self.joint2idx = dict()
        self.joint2channels = dict()
        cur_idx = 0
        for joint in joints:
            self.joint2idx[joint.value[1]] = cur_idx
            cur_idx += int(joint["CHANNELS"][0])
            self.joint2channels[joint.value[1]] = joint["CHANNELS"][1:]
        self.np_data_array = np.array(self.frames, dtype=np.float32)

    def tokenize(self):
        """Tokenize BVH text and populate hierarchy plus frame values."""
        first_round = []
        accumulator = ""
        for char in self.data:
            if char not in ("\n", "\r"):
                accumulator += char
            elif accumulator:
                first_round.append(re.split("\\s+", accumulator.strip()))
                accumulator = ""
        node_stack = [self.root]
        frame_time_found = False
        node = None
        for item in first_round:
            if frame_time_found:
                self.frames.append(item)
                continue
            key = item[0]
            if key == "{":
                node_stack.append(node)
            elif key == "}":
                node_stack.pop()
            else:
                node = BvhNode(item)
                # print("new node: ", node, "\nparent: ", node_stack[-1])
                node_stack[-1].add_child(node)
            if item[0] == "Frame" and item[1] == "Time:":
                frame_time_found = True

    def search(self, *items):
        """Depth-first search for nodes matching a prefix of tokens."""
        found_nodes = []

        def check_children(node):
            if len(node.value) >= len(items):
                failed = False
                for index, item in enumerate(items):
                    if node.value[index] != item:
                        failed = True
                        break
                if not failed:
                    found_nodes.append(node)
            for child in node:
                check_children(child)

        check_children(self.root)
        return found_nodes

    def get_joints(self):
        """Return all `ROOT`/`JOINT` hierarchy joints in BVH traversal order."""
        joints = []

        def iterate_joints(joint):
            joints.append(joint)
            for child in joint.filter("JOINT"):
                iterate_joints(child)

        iterate_joints(next(self.root.filter("ROOT")))
        return joints

    def get_joints_names(self):
        """Return joint names in the same order as :meth:`get_joints`."""
        joints = []

        def iterate_joints(joint):
            joints.append(joint.value[1])
            for child in joint.filter("JOINT"):
                iterate_joints(child)

        iterate_joints(next(self.root.filter("ROOT")))
        return joints

    def joint_direct_children(self, name):
        """Return direct child joints of the given joint name."""
        joint = self.get_joint(name)
        return [child for child in joint.filter("JOINT")]

    def get_joint_index(self, name):
        """Return hierarchy index of the named joint."""
        return self.get_joints().index(self.get_joint(name))

    def get_joint(self, name):
        """Return hierarchy node for a joint name."""
        found = self.search("ROOT", name)
        if not found:
            found = self.search("JOINT", name)
        if found:
            return found[0]
        raise LookupError("joint not found")

    def joint_offset(self, name, idx=[0, 1, 2]):
        """Return selected `OFFSET` components for a joint."""
        joint = self.get_joint(name)
        offset = joint["OFFSET"]
        if len(offset) < max(idx):
            return None
        return (float(offset[idx[0]]), float(offset[idx[1]]), float(offset[idx[2]]))

    def joint_offset_rot(self, name):
        """Return optional rotational offset components from custom BVH files."""
        return self.joint_offset(name, idx=[3, 4, 5])

    def joint_channels(self, name):
        """Return channel names declared for a joint."""
        if self.backend == "np":
            return self.joint2channels[name]
        else:
            joint = self.get_joint(name)
            return joint["CHANNELS"][1:]

    def get_joint_channels_index(self, joint_name):
        """Return the flattened starting channel index for one joint."""
        if self.backend == "np":
            return self.joint2idx[joint_name]
        else:
            index = 0
            for joint in self.get_joints():
                if joint.value[1] == joint_name:
                    return index
                index += int(joint["CHANNELS"][0])
            raise LookupError("joint not found")

    def get_joint_channel_index(self, joint, channel):
        """Return per-joint channel offset for a specific channel name."""
        channels = self.joint_channels(joint)
        if channel in channels:
            channel_index = channels.index(channel)
        else:
            raise ValueError(f"Channel {channel} not found in {channels}")
        return channel_index

    def frame_joint_channel(self, frame_index, joint, channel, value=None):
        """Return one channel value for one joint at one frame index."""
        joint_index = self.get_joint_channels_index(joint)
        channel_index = self.get_joint_channel_index(joint, channel)
        if channel_index == -1 and value is not None:
            return value
        if self.backend == "np":
            return self.np_data_array[frame_index, joint_index + channel_index]
        else:
            return float(self.frames[frame_index][joint_index + channel_index])

    def frame_joint_channels(self, frame_index, joint, channels, value=None):
        """Get single frame data for on specific joint from multiple specific channels (e.g.
        Xrotation, Yrotation, Zrotation)."""
        values = []
        joint_index = self.get_joint_channels_index(joint)
        if self.backend == "np":
            channel_idx = [self.get_joint_channel_index(joint, channel) for channel in channels]
            channel_idx = np.array(channel_idx) + joint_index
            values = self.np_data_array[frame_index, channel_idx]
        else:
            for channel in channels:
                channel_index = self.get_joint_channel_index(joint, channel)
                if channel_index == -1 and value is not None:
                    values.append(value)
                else:
                    values.append(float(self.frames[frame_index][joint_index + channel_index]))
        return values

    def frames_joint_channels(self, joint, channels, value=None):
        """Get all frame data for one joint from multiple channels (e.g. Xrotation, Yrotation,
        Zrotation)."""
        joint_index = self.get_joint_channels_index(joint)
        if self.backend == "np":
            channel_idx = [self.get_joint_channel_index(joint, channel) for channel in channels]
            channel_idx = np.array(channel_idx) + joint_index
            all_frames = self.np_data_array[:, channel_idx]
        else:
            all_frames = []
            for frame in self.frames:
                values = []
                for channel in channels:
                    channel_index = self.get_joint_channel_index(joint, channel)
                    if channel_index == -1 and value is not None:
                        values.append(value)
                    else:
                        values.append(float(frame[joint_index + channel_index]))
                all_frames.append(values)
        return all_frames

    def frames_joints_channels(self, joint_names, channels):
        """Get all frames for all specified joints with one specified set of channels."""
        if self.backend != "np":
            raise NotImplementedError("Only np backend is supported for this function")
        joint_indices = [(joint_name, self.joint2idx[joint_name]) for joint_name in joint_names]
        data_indices = []
        for joint_name, joint_idx in joint_indices:
            channel_indices = [self.get_joint_channel_index(joint_name, channel) for channel in channels]
            data_indices.extend([joint_idx + channel_idx for channel_idx in channel_indices])
        all_frames = self.np_data_array[:, data_indices]
        all_frames = all_frames.reshape(-1, len(joint_names), len(channels))
        return all_frames

    def joint_parent(self, name):
        """Return parent joint node, or `None` for the root."""
        joint = self.get_joint(name)
        if joint.parent == self.root:
            return None
        return joint.parent

    def joint_parent_index(self, name):
        """Return parent joint index, or `-1` for the root."""
        joint = self.get_joint(name)
        if joint.parent == self.root:
            return -1
        return self.get_joints().index(joint.parent)

    @property
    def nframes(self):
        """Number of motion frames declared in the BVH header."""
        try:
            return int(next(self.root.filter("Frames:")).value[1])
        except StopIteration:
            raise LookupError("number of frames not found")

    @property
    def frame_time(self):
        """Frame duration in seconds declared in the BVH header."""
        try:
            return float(next(self.root.filter("Frame")).value[2])
        except StopIteration:
            raise LookupError("frame time not found")


class Bone:
    """Container for one skeleton bone and its kinematic metadata."""

    def __init__(self):
        # original bone info
        self.id = None
        self.name = None
        self.orient = np.identity(3)
        self.dof_index = []
        self.channels = []  # bvh only
        self.lb = []
        self.ub = []
        self.parent = None
        self.child = []

        # asf specific
        self.dir = np.zeros(3)
        self.len = 0
        # bvh specific
        self.offset = np.zeros(3)  # default offset for position
        self.offset_rot = None  # rotation for custom nv bvh

        # inferred info
        self.pos = np.zeros(3)
        self.end = np.zeros(3)

    def __repr__(self):
        return f"{self.name}"


class SkeletonBvh:
    """Skeleton structure reconstructed from BVH hierarchy metadata."""

    def __init__(self):
        self.bones = []
        self.name2bone = {}
        self.mass_scale = 1.0
        self.len_scale = 1.0
        self.dof_name = ["x", "y", "z"]
        self.root = None

    def get_bones_names(self):
        """Return bone names in skeleton order."""
        return [x.name for x in self.bones]

    def get_parent_indices(self):
        """Return parent index array aligned with `self.bones`."""
        parent_indices = [-1] * len(self.bones)
        for bone in self.bones:
            if bone.parent:
                parent_indices[bone.id] = bone.parent.id
        return parent_indices

    def get_neutral_joints(self):
        """Return neutral/rest joint positions as a NumPy array `(J, 3)`."""
        joints = []
        for bone in self.bones:
            joints.append(bone.pos)
        joints = np.stack(joints, axis=0)
        return joints

    def load_from_bvh(self, fname, exclude_bones=None, spec_channels=None, mocap=None):
        """Load skeleton hierarchy and rest offsets from a BVH file.

        Args:
            fname: Path to a BVH file (ignored when *mocap* is given).
            exclude_bones: Bone-name substrings to ignore while constructing the
                skeleton.
            spec_channels: Optional per-joint channel overrides.
            mocap: Pre-parsed :class:`Bvh` object.  When provided the file is
                not re-read from disk.
        """
        if exclude_bones is None:
            exclude_bones = {}
        if spec_channels is None:
            spec_channels = dict()
        if mocap is None:
            with open(fname) as f:
                mocap = Bvh(f.read())

        joint_names = list(
            filter(
                lambda x: all([t not in x for t in exclude_bones]),
                mocap.get_joints_names(),
            )
        )
        dof_ind = {"x": 0, "y": 1, "z": 2}
        self.len_scale = 1.0
        self.root = Bone()
        self.root.id = 0
        self.root.name = joint_names[0]
        self.root.channels = mocap.joint_channels(self.root.name)
        self.root.offset = np.array(mocap.joint_offset(self.root.name)) * self.len_scale
        self.root.offset_rot = mocap.joint_offset_rot(self.root.name)
        if self.root.offset_rot is not None:
            self.root.offset_rot = np.array(self.root.offset_rot)
        # self.root.offset = np.zeros_like(self.root.offset) # TODO: remove this
        self.name2bone[self.root.name] = self.root
        self.bones.append(self.root)
        for i, joint in enumerate(joint_names[1:]):
            bone = Bone()
            bone.id = i + 1
            bone.name = joint
            bone.channels = spec_channels[joint] if joint in spec_channels.keys() else mocap.joint_channels(joint)
            bone.dof_index = [dof_ind[x[0].lower()] for x in bone.channels]
            bone.offset = np.array(mocap.joint_offset(joint)) * self.len_scale
            bone.offset_rot = mocap.joint_offset_rot(joint)
            if bone.offset_rot is not None:
                bone.offset_rot = np.array(bone.offset_rot)
            bone.lb = [-180.0] * 3
            bone.ub = [180.0] * 3
            self.bones.append(bone)
            self.name2bone[joint] = bone

        # for bone in self.bones:
        # print(bone.name, bone.channels, bone.offset)

        for bone in self.bones[1:]:
            parent_name = mocap.joint_parent(bone.name).name
            if parent_name in self.name2bone.keys():
                bone_p = self.name2bone[parent_name]
                bone_p.child.append(bone)
                bone.parent = bone_p

        self.forward_bvh(self.root)
        for bone in self.bones:
            if len(bone.child) == 0:
                child_vals = [str(node) for node in mocap.get_joint(bone.name).children]
                if "End Site" in child_vals:
                    end_site_idx = child_vals.index("End Site")
                    end_site_offset = mocap.get_joint(bone.name).children[end_site_idx]["OFFSET"]
                    bone.end = bone.pos + np.array([float(x) for x in end_site_offset]) * self.len_scale
                else:
                    pass
            else:
                bone.end = sum([bone_c.pos for bone_c in bone.child]) / len(bone.child)

    def forward_bvh(self, bone):
        """Recursively accumulate absolute joint positions from local offsets."""
        if bone.parent:
            bone.pos = bone.parent.pos + bone.offset
        else:
            bone.pos = bone.offset
        for bone_c in bone.child:
            self.forward_bvh(bone_c)


def load_bvh_animation(
    fname: str,
    skeleton: SkeletonBvh,
    rot_order: Optional[str] = "native",
    backend: Optional[str] = "np",
    return_quat: Optional[bool] = False,
    mocap: Optional["Bvh"] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Load motion channels from BVH into root translations and joint rotations.

    Args:
        fname: Full path to the BVH file (ignored when *mocap* is given).
        skeleton: Parsed neutral skeleton built from compatible BVH hierarchy.
        rot_order: Euler order to use for conversion (`"native"` keeps BVH order).
        backend: BVH parser backend (`"np"` or `"graph"`).
        return_quat: If `True`, return quaternions instead of rotation matrices.
        mocap: Pre-parsed :class:`Bvh` object.  When provided the file is
            not re-read from disk.

    Returns:
        Root translations `(T, 3)` and joint rotations `(T, J, 3, 3)` or
        `(T, J, 4)` when `return_quat=True`.
    """
    if mocap is None:
        with open(fname) as f:
            mocap = Bvh(f.read(), backend=backend)

    # assume all joints are same ordering, load in with native ordering
    root_channels = mocap.joint_channels(skeleton.root.name)
    pos_channels = [channel for channel in root_channels if channel.endswith("position")]
    rot_channels = [channel for channel in root_channels if channel.endswith("rotation")]

    root_trans = np.array(mocap.frames_joint_channels(skeleton.root.name, pos_channels))

    effective_backend = mocap.backend
    if effective_backend == "np":
        # NOTE: assumes rot channel ordering is the same for all joints
        joint_eulers = mocap.frames_joints_channels(skeleton.get_bones_names(), rot_channels)
        joint_eulers = np.deg2rad(joint_eulers)
    elif effective_backend == "graph":
        joint_eulers = []
        for bone in skeleton.bones:
            bone_channels = mocap.joint_channels(bone.name)
            bone_rot_channels = [channel for channel in bone_channels if channel.endswith("rotation")]
            assert bone_rot_channels == rot_channels, "Rotation channel ordering is not consistent across joints!"
            # use native rotation order
            euler = np.deg2rad(np.array(mocap.frames_joint_channels(bone.name, rot_channels)))
            joint_eulers.append(euler)
        joint_eulers = np.stack(joint_eulers, axis=1)
    else:
        raise ValueError(f"Unknown backend for BVH loading: {effective_backend}")

    if rot_order == "native":
        rot_order = ""
        for axis in rot_channels:
            rot_order += axis[0]
    else:
        # need to reorder dims
        ordered_joint_eulers = []
        for axis in rot_order:
            i = rot_channels.index(axis + "rotation")
            ordered_joint_eulers.append(joint_eulers[..., i])
        joint_eulers = np.stack(ordered_joint_eulers, axis=-1)

    rotations = Rotation.from_euler(rot_order, joint_eulers.reshape(-1, 3))
    if return_quat:
        joint_rots = rotations.as_quat(scalar_first=True).reshape(joint_eulers.shape[:-1] + (4,))
    else:
        joint_rots = rotations.as_matrix().reshape(joint_eulers.shape[:-1] + (3, 3))

    return root_trans, joint_rots


def parse_bvh_motion(file_path_input: str, parse_neutral_joints: bool = False):
    """Parse a BVH motion into tensors used by kimodo motion pipelines.

    Args:
        file_path_input: Path to input BVH file.
        parse_neutral_joints: If `True`, also return neutral joints in meters.

    Returns:
        ``(local_rot_mats, root_trans, fps)`` or
        ``(local_rot_mats, root_trans, fps, neutral_joints)`` when requested.
    """
    with open(file_path_input) as f:
        mocap = Bvh(f.read(), backend="np")

    fps = 1.0 / mocap.frame_time

    skeletonBVH = SkeletonBvh()
    exclude_bones = {"Root"}
    skeletonBVH.load_from_bvh(file_path_input, exclude_bones=exclude_bones, mocap=mocap)

    root_trans, local_rot_mats = load_bvh_animation(file_path_input, skeletonBVH, mocap=mocap)
    root_trans *= 0.01  # unit change: cm -> m
    root_trans = torch.tensor(root_trans)
    local_rot_mats = torch.tensor(local_rot_mats)

    # Don't parse neutral_joints here
    # it is not actually needed right now:
    # the skeleton is always the same, and saved in the folder
    # carefull: the one saved in the folder it relative to the standard t_pose
    # whereas the parsed one is not
    if not parse_neutral_joints:
        return local_rot_mats, root_trans, fps

    neutral_joints = skeletonBVH.get_neutral_joints()
    neutral_joints *= 0.01  # unit change: cm -> m
    # remove the root position of the skeleton
    # (it is already "included" in the root_translation)
    root_idx = 0
    neutral_joints = torch.tensor(neutral_joints - neutral_joints[root_idx])
    return local_rot_mats, root_trans, fps, neutral_joints


================================================
FILE: kimodo/skeleton/definitions.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Concrete skeleton definitions: SOMA, G1, SMPLX with joint names and hierarchy."""

from pathlib import Path

import numpy as np
import torch

from ..tools import ensure_batched
from .base import SkeletonBase


class SOMASkeleton77(SkeletonBase):
    """High-detail 77-joint SOMA skeleton with full finger and toe chains."""

    name = "somaskel77"

    right_foot_joint_names = [
        "RightFoot",
        "RightToeBase",
        "RightToeEnd",
    ]  # in order of chain
    left_foot_joint_names = [
        "LeftFoot",
        "LeftToeBase",
        "LeftToeEnd",
    ]  # in order of chain
    right_hand_joint_names = [
        "RightHand",
        "RightHandThumb1",
        "RightHandThumb2",
        "RightHandThumb3",
        "RightHandThumbEnd",
        "RightHandIndex1",
        "RightHandIndex2",
        "RightHandIndex3",
        "RightHandIndex4",
        "RightHandIndexEnd",
        "RightHandMiddle1",
        "RightHandMiddle2",
        "RightHandMiddle3",
        "RightHandMiddle4",
        "RightHandMiddleEnd",
        "RightHandRing1",
        "RightHandRing2",
        "RightHandRing3",
        "RightHandRing4",
        "RightHandRingEnd",
        "RightHandPinky1",
        "RightHandPinky2",
        "RightHandPinky3",
        "RightHandPinky4",
        "RightHandPinkyEnd",
    ]  # in order of chain
    left_hand_joint_names = [
        "LeftHand",
        "LeftHandThumb1",
        "LeftHandThumb2",
        "LeftHandThumb3",
        "LeftHandThumbEnd",
        "LeftHandIndex1",
        "LeftHandIndex2",
        "LeftHandIndex3",
        "LeftHandIndex4",
        "LeftHandIndexEnd",
        "LeftHandMiddle1",
        "LeftHandMiddle2",
        "LeftHandMiddle3",
        "LeftHandMiddle4",
        "LeftHandMiddleEnd",
        "LeftHandRing1",
        "LeftHandRing2",
        "LeftHandRing3",
        "LeftHandRing4",
        "LeftHandRingEnd",
        "LeftHandPinky1",
        "LeftHandPinky2",
        "LeftHandPinky3",
        "LeftHandPinky4",
        "LeftHandPinkyEnd",
    ]  # in order of chain

    hip_joint_names = ["RightLeg", "LeftLeg"]  # in order [right, left]

    bone_order_names_with_parents = [
        ("Hips", None),
        ("Spine1", "Hips"),
        ("Spine2", "Spine1"),
        ("Chest", "Spine2"),
        ("Neck1", "Chest"),
        ("Neck2", "Neck1"),
        ("Head", "Neck2"),
        ("HeadEnd", "Head"),
        ("Jaw", "Head"),
        ("LeftEye", "Head"),
        ("RightEye", "Head"),
        ("LeftShoulder", "Chest"),
        ("LeftArm", "LeftShoulder"),
        ("LeftForeArm", "LeftArm"),
        ("LeftHand", "LeftForeArm"),
        ("LeftHandThumb1", "LeftHand"),
        ("LeftHandThumb2", "LeftHandThumb1"),
        ("LeftHandThumb3", "LeftHandThumb2"),
        ("LeftHandThumbEnd", "LeftHandThumb3"),
        ("LeftHandIndex1", "LeftHand"),
        ("LeftHandIndex2", "LeftHandIndex1"),
        ("LeftHandIndex3", "LeftHandIndex2"),
        ("LeftHandIndex4", "LeftHandIndex3"),
        ("LeftHandIndexEnd", "LeftHandIndex4"),
        ("LeftHandMiddle1", "LeftHand"),
        ("LeftHandMiddle2", "LeftHandMiddle1"),
        ("LeftHandMiddle3", "LeftHandMiddle2"),
        ("LeftHandMiddle4", "LeftHandMiddle3"),
        ("LeftHandMiddleEnd", "LeftHandMiddle4"),
        ("LeftHandRing1", "LeftHand"),
        ("LeftHandRing2", "LeftHandRing1"),
        ("LeftHandRing3", "LeftHandRing2"),
        ("LeftHandRing4", "LeftHandRing3"),
        ("LeftHandRingEnd", "LeftHandRing4"),
        ("LeftHandPinky1", "LeftHand"),
        ("LeftHandPinky2", "LeftHandPinky1"),
        ("LeftHandPinky3", "LeftHandPinky2"),
        ("LeftHandPinky4", "LeftHandPinky3"),
        ("LeftHandPinkyEnd", "LeftHandPinky4"),
        ("RightShoulder", "Chest"),
        ("RightArm", "RightShoulder"),
        ("RightForeArm", "RightArm"),
        ("RightHand", "RightForeArm"),
        ("RightHandThumb1", "RightHand"),
        ("RightHandThumb2", "RightHandThumb1"),
        ("RightHandThumb3", "RightHandThumb2"),
        ("RightHandThumbEnd", "RightHandThumb3"),
        ("RightHandIndex1", "RightHand"),
        ("RightHandIndex2", "RightHandIndex1"),
        ("RightHandIndex3", "RightHandIndex2"),
        ("RightHandIndex4", "RightHandIndex3"),
        ("RightHandIndexEnd", "RightHandIndex4"),
        ("RightHandMiddle1", "RightHand"),
        ("RightHandMiddle2", "RightHandMiddle1"),
        ("RightHandMiddle3", "RightHandMiddle2"),
        ("RightHandMiddle4", "RightHandMiddle3"),
        ("RightHandMiddleEnd", "RightHandMiddle4"),
        ("RightHandRing1", "RightHand"),
        ("RightHandRing2", "RightHandRing1"),
        ("RightHandRing3", "RightHandRing2"),
        ("RightHandRing4", "RightHandRing3"),
        ("RightHandRingEnd", "RightHandRing4"),
        ("RightHandPinky1", "RightHand"),
        ("RightHandPinky2", "RightHandPinky1"),
        ("RightHandPinky3", "RightHandPinky2"),
        ("RightHandPinky4", "RightHandPinky3"),
        ("RightHandPinkyEnd", "RightHandPinky4"),
        ("LeftLeg", "Hips"),
        ("LeftShin", "LeftLeg"),
        ("LeftFoot", "LeftShin"),
        ("LeftToeBase", "LeftFoot"),
        ("LeftToeEnd", "LeftToeBase"),
        ("RightLeg", "Hips"),
        ("RightShin", "RightLeg"),
        ("RightFoot", "RightShin"),
        ("RightToeBase", "RightFoot"),
        ("RightToeEnd", "RightToeBase"),
    ]

    @property
    def relaxed_hands_rest_pose(self):
        # lazy loading
        if hasattr(self, "_relaxed_hands_rest_pose"):
            return self._relaxed_hands_rest_pose

        relaxed_hands_pose_path = Path(self.folder) / "relaxed_hands_rest_pose.npy"
        relaxed_hands_rest_pose = torch.from_numpy(np.load(relaxed_hands_pose_path)).squeeze()
        self.register_buffer(
            "_relaxed_hands_rest_pose",
            relaxed_hands_rest_pose,
            persistent=False,
        )
        return self._relaxed_hands_rest_pose


class SOMASkeleton30(SkeletonBase):
    """Compact 30-joint SOMA variant with reduced hand and end-effector detail."""

    name = "somaskel30"

    right_foot_joint_names = [
        "RightFoot",
        "RightToeBase",
    ]  # in order of chain
    left_foot_joint_names = [
        "LeftFoot",
        "LeftToeBase",
    ]  # in order of chain
    right_hand_joint_names = [
        "RightHand",
        "RightHandMiddleEnd",
    ]  # in order of chain
    left_hand_joint_names = [
        "LeftHand",
        "LeftHandMiddleEnd",
    ]  # in order of chain

    hip_joint_names = ["RightLeg", "LeftLeg"]  # in order [right, left]

    bone_order_names_with_parents = [
        ("Hips", None),
        ("Spine1", "Hips"),
        ("Spine2", "Spine1"),
        ("Chest", "Spine2"),
        ("Neck1", "Chest"),
        ("Neck2", "Neck1"),
        ("Head", "Neck2"),
        ("Jaw", "Head"),
        ("LeftEye", "Head"),
        ("RightEye", "Head"),
        ("LeftShoulder", "Chest"),
        ("LeftArm", "LeftShoulder"),
        ("LeftForeArm", "LeftArm"),
        ("LeftHand", "LeftForeArm"),
        ("LeftHandThumbEnd", "LeftHand"),
        ("LeftHandMiddleEnd", "LeftHand"),
        ("RightShoulder", "Chest"),
        ("RightArm", "RightShoulder"),
        ("RightForeArm", "RightArm"),
        ("RightHand", "RightForeArm"),
        ("RightHandThumbEnd", "RightHand"),
        ("RightHandMiddleEnd", "RightHand"),
        ("LeftLeg", "Hips"),
        ("LeftShin", "LeftLeg"),
        ("LeftFoot", "LeftShin"),
        ("LeftToeBase", "LeftFoot"),
        ("RightLeg", "Hips"),
        ("RightShin", "RightLeg"),
        ("RightFoot", "RightShin"),
        ("RightToeBase", "RightFoot"),
    ]

    @property
    def somaskel77(self):
        # lazy loading
        if not hasattr(self, "_somaskel77"):
            self._somaskel77 = SOMASkeleton77()
        return self._somaskel77

    @ensure_batched(local_joint_rots_subset=4)
    def to_SOMASkeleton77(self, local_joint_rots_subset: torch.Tensor):
        # Converting from 30-joint to 77-joint to have relaxed hands

        device = local_joint_rots_subset.device
        nF = len(local_joint_rots_subset)
        local_joint_rots_mats = self.somaskel77.relaxed_hands_rest_pose.clone().to(device).repeat(nF, 1, 1, 1)

        skel_slice = self.get_skel_slice(self.somaskel77)
        local_joint_rots_mats[:, skel_slice] = local_joint_rots_subset
        return local_joint_rots_mats

    @ensure_batched(local_joint_rots_full=4) # [BT, J, 3, 3]
    def from_SOMASkeleton77(self, local_joint_rots_full: torch.Tensor) -> torch.Tensor:
        """Extract the 30-joint subset from 77-joint local rotation data."""
        skel_slice = self.get_skel_slice(self.somaskel77)
        return local_joint_rots_full[:, skel_slice]

    def output_to_SOMASkeleton77(self, output: dict) -> dict:
        """Convert model output dict from somaskel30 to somaskel77.

        Expands local_rot_mats to 77 joints, re-runs FK for global_rot_mats and posed_joints. Foot
        contacts are expanded from 4 channels to 6 (toe-end copies toe-base contact).
        """
        local_rot_mats_77 = self.to_SOMASkeleton77(output["local_rot_mats"])
        root_positions = output["root_positions"]
        global_rot_mats_77, posed_joints_77, _ = self.somaskel77.fk(local_rot_mats_77, root_positions)
        out_77 = dict(output)
        out_77["local_rot_mats"] = local_rot_mats_77
        out_77["global_rot_mats"] = global_rot_mats_77
        out_77["posed_joints"] = posed_joints_77

        if "foot_contacts" in output:
            fc = output["foot_contacts"]  # [..., 4]: [L_heel, L_toe, R_heel, R_toe]
            # -> [..., 6]: [L_heel, L_toe, L_toe_end, R_heel, R_toe, R_toe_end]
            out_77["foot_contacts"] = torch.cat([fc[..., :2], fc[..., 1:2], fc[..., 2:4], fc[..., 3:4]], dim=-1)

        return out_77


class G1Skeleton34(SkeletonBase):
    """Unitree G1 skeleton with 32 articulated joints plus 2 toe endpoints."""

    name = "g1skel34"
    right_foot_joint_names = ["right_ankle_roll_skel", "right_toe_base"]
    left_foot_joint_names = ["left_ankle_roll_skel", "left_toe_base"]
    right_hand_joint_names = ["right_wrist_yaw_skel", "right_hand_roll_skel"]
    left_hand_joint_names = ["left_wrist_yaw_skel", "left_hand_roll_skel"]

    hip_joint_names = [
        "right_hip_pitch_skel",
        "left_hip_pitch_skel",
    ]  # used to calculate root orientation, only need 1 pair of hip joints

    bone_order_names_with_parents = [
        ("pelvis_skel", None),
        ("left_hip_pitch_skel", "pelvis_skel"),
        ("left_hip_roll_skel", "left_hip_pitch_skel"),
        ("left_hip_yaw_skel", "left_hip_roll_skel"),
        ("left_knee_skel", "left_hip_yaw_skel"),
        ("left_ankle_pitch_skel", "left_knee_skel"),
        ("left_ankle_roll_skel", "left_ankle_pitch_skel"),
        ("left_toe_base", "left_ankle_roll_skel"),
        ("right_hip_pitch_skel", "pelvis_skel"),
        ("right_hip_roll_skel", "right_hip_pitch_skel"),
        ("right_hip_yaw_skel", "right_hip_roll_skel"),
        ("right_knee_skel", "right_hip_yaw_skel"),
        ("right_ankle_pitch_skel", "right_knee_skel"),
        ("right_ankle_roll_skel", "right_ankle_pitch_skel"),
        ("right_toe_base", "right_ankle_roll_skel"),
        ("waist_yaw_skel", "pelvis_skel"),
        ("waist_roll_skel", "waist_yaw_skel"),
        ("waist_pitch_skel", "waist_roll_skel"),
        ("left_shoulder_pitch_skel", "waist_pitch_skel"),
        ("left_shoulder_roll_skel", "left_shoulder_pitch_skel"),
        ("left_shoulder_yaw_skel", "left_shoulder_roll_skel"),
        ("left_elbow_skel", "left_shoulder_yaw_skel"),
        ("left_wrist_roll_skel", "left_elbow_skel"),
        ("left_wrist_pitch_skel", "left_wrist_roll_skel"),
        ("left_wrist_yaw_skel", "left_wrist_pitch_skel"),
        ("left_hand_roll_skel", "left_wrist_yaw_skel"),
        ("right_shoulder_pitch_skel", "waist_pitch_skel"),
        ("right_shoulder_roll_skel", "right_shoulder_pitch_skel"),
        ("right_shoulder_yaw_skel", "right_shoulder_roll_skel"),
        ("right_elbow_skel", "right_shoulder_yaw_skel"),
        ("right_wrist_roll_skel", "right_elbow_skel"),
        ("right_wrist_pitch_skel", "right_wrist_roll_skel"),
        ("right_wrist_yaw_skel", "right_wrist_pitch_skel"),
        ("right_hand_roll_skel", "right_wrist_yaw_skel"),
    ]


class SMPLXSkeleton22(SkeletonBase):
    """SMPL-X skeleton with body-only 22 joints."""

    name = "smplx22"
    right_foot_joint_names = ["right_ankle", "right_foot"]  # in order of chain
    left_foot_joint_names = ["left_ankle", "left_foot"]  # in order of chain
    right_hand_joint_names = ["right_wrist"]  # in order of chain
    left_hand_joint_names = ["left_wrist"]  # in order of chain
    hip_joint_names = ["right_hip", "left_hip"]  # in order [right, left]

    bone_order_names_with_parents = [
        ("pelvis", None),
        ("left_hip", "pelvis"),
        ("right_hip", "pelvis"),
        ("spine1", "pelvis"),
        ("left_knee", "left_hip"),
        ("right_knee", "right_hip"),
        ("spine2", "spine1"),
        ("left_ankle", "left_knee"),
        ("right_ankle", "right_knee"),
        ("spine3", "spine2"),
        ("left_foot", "left_ankle"),
        ("right_foot", "right_ankle"),
        ("neck", "spine3"),
        ("left_collar", "spine3"),
        ("right_collar", "spine3"),
        ("head", "neck"),
        ("left_shoulder", "left_collar"),
        ("right_shoulder", "right_collar"),
        ("left_elbow", "left_shoulder"),
        ("right_elbow", "right_shoulder"),
        ("left_wrist", "left_elbow"),
        ("right_wrist", "right_elbow"),
    ]


================================================
FILE: kimodo/skeleton/kinematics.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Forward-kinematics primitives for articulated skeletons."""

from typing import List

import einops
import torch
import torch.nn.functional as F

from ..tools import ensure_batched


@ensure_batched(local_joint_rots=4, root_positions=2)
def fk(
    local_joint_rots: torch.Tensor,
    root_positions: torch.Tensor,
    skeleton,
    root_positions_is_global: bool = True,
):
    """Compute global joint rotations and positions from local rotations.

    Args:
        local_joint_rots: Local rotation matrices with shape `(..., J, 3, 3)`.
        root_positions: Root translations with shape `(..., 3)`.
        skeleton: Skeleton object exposing `neutral_joints`, `joint_parents`, and
            `root_idx`.
        root_positions_is_global: If `True`, neutral joints are recentered so root
            translations are interpreted in world space.

    Returns:
        Tuple `(global_joint_rots, posed_joints, posed_joints_norootpos)`.
    """
    device = local_joint_rots.device
    dtype = local_joint_rots.dtype

    # If skeleton has baked rest (e.g. from XML), identity local = baked rest pose.
    # So training/inference local rotations are in reference to XML rest *orientations*.
    rest_local = getattr(skeleton, "rest_local_rots", None)
    if rest_local is not None:
        rest_local = rest_local.to(device=device, dtype=dtype)
        local_joint_rots = torch.einsum("jmn,...jno->...jmo", rest_local, local_joint_rots)

    # Rest positions for FK. Must be consistent with rest_local: when local = identity,
    # FK(rest_local, neutral_joints) should equal the XML rest pose positions. So
    # neutral_joints are not necessarily the raw XML joint positions; they are the
    # rest layout that, when rotated by rest_local, yields the XML rest positions.
    neutral_joints = skeleton.neutral_joints.to(device=device, dtype=dtype)

    if root_positions_is_global is True:
        # Removing the pelvis offset from the neutral joints
        # as the root positions does not depends on the pelvis offset of the skeleton
        pelvis_offset = neutral_joints[skeleton.root_idx]
        neutral_joints = neutral_joints - pelvis_offset

    # compute joint position and global rotations
    joints = einops.repeat(
        neutral_joints,
        "j k -> b j k",
        b=len(local_joint_rots),
    )
    posed_joints_norootpos, global_joint_rots = batch_rigid_transform(
        local_joint_rots,
        joints,
        skeleton.joint_parents,
        skeleton.root_idx,
    )
    # if root_positions_is_global is True:
    # posed_joints_norootpos always start at zero
    # otherwise it could start with the pelvis offset

    posed_joints = posed_joints_norootpos + root_positions[:, None]
    return global_joint_rots, posed_joints, posed_joints_norootpos


def compute_idx_levels(parents):
    """Group joint indices by hierarchy depth for level-wise FK updates.

    Args:
        parents: Parent index tensor of shape `(J,)` with root parent `-1`.

    Returns:
        List of index tensors, where each tensor contains joints at one depth.
    """
    idx_levs = [[]]
    lev_dicts = {0: -1}
    for i in range(1, parents.shape[0]):
        assert int(parents[i]) in lev_dicts
        lev = lev_dicts[int(parents[i])] + 1
        if lev + 1 > len(idx_levs):
            idx_levs.append([])
        idx_levs[lev].append(int(i))
        lev_dicts[int(i)] = lev
    idx_levs = [torch.tensor(x).long() for x in idx_levs]
    return idx_levs


def batch_rigid_transform(rot_mats, joints, parents, root_idx):
    """Perform batch rigid transformation on a skeletal structure.

    Args:
        rot_mats: Local rotation matrices for each joint: (B, J, 3, 3)
        joints: Initial joint positions: (B, J, 3)
        parents: Tensor indicating the parent of each joint: (J,)
        root_idx (int): index of the root

    Returns:
        Transformed joint positions after applying forward kinematics.
    """

    # Compute the hierarchical levels of joints based on their parent relationships
    idx_levs = compute_idx_levels(parents)

    # Apply forward kinematics to transform the joints
    return forward_kinematics(rot_mats, joints, parents, idx_levs, root_idx)


@torch.jit.script
def transform_mat(R, t):
    """Creates a batch of transformation matrices.

    Args:
        - R: Bx3x3 array of a batch of rotation matrices
        - t: Bx3x1 array of a batch of translation vectors
    Returns:
        - T: Bx4x4 Transformation matrix
    """
    # No padding left or right, only add an extra row
    return torch.cat([F.pad(R, [0, 0, 0, 1]), F.pad(t, [0, 0, 0, 1], value=1.0)], dim=2)


@torch.jit.script
def forward_kinematics(
    rot_mats,
    joints,
    parents: torch.Tensor,
    idx_levs: List[torch.Tensor],
    root_idx: int,
):
    """Perform forward kinematics to compute posed joints and global rotation matrices.

    Args:
        rot_mats: Local rotation matrices for each joint: (B, J, 3, 3)
        joints: Initial joint positions: (B, J, 3)
        parents: Tensor indicating the parent of each joint: (J,)
        idx_levs: Tensors of joint indices grouped by depth in the kinematic tree.
        root_idx (int): index of the root
    Returns:
        Posed joints: (B, J, 3)
        Global rotation matrices: (B, J, 3, 3)
    """

    # Add an extra dimension to joints
    joints = torch.unsqueeze(joints, dim=-1)

    # Compute relative joint positions
    rel_joints = joints.clone()

    mask_no_root = torch.ones(joints.shape[1], dtype=torch.bool)
    mask_no_root[root_idx] = False
    rel_joints[:, mask_no_root] -= joints[:, parents[mask_no_root]].clone()

    # Compute initial transformation matrices
    # (B, J + 1, 4, 4)
    transforms_mat = transform_mat(rot_mats.reshape(-1, 3, 3), rel_joints.reshape(-1, 3, 1)).reshape(
        -1, joints.shape[1], 4, 4
    )

    # Initialize the root transformation matrices
    transforms = torch.zeros_like(transforms_mat)
    transforms[:, root_idx] = transforms_mat[:, root_idx]

    # Compute global transformations level by level
    for indices in idx_levs:
        curr_res = torch.matmul(transforms[:, parents[indices]], transforms_mat[:, indices])
        transforms[:, indices] = curr_res

    # Extract posed joint positions from the transformation matrices
    posed_joints = transforms[:, :, :3, 3]

    # Extract global rotation matrices from the transformation matrices
    global_rot_mat = transforms[:, :, :3, :3]

    return posed_joints, global_rot_mat


================================================
FILE: kimodo/skeleton/registry.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Factory helpers for building predefined skeleton variants."""

from pathlib import Path

from kimodo.assets import SKELETONS_ROOT

from .definitions import (
    G1Skeleton34,
    SMPLXSkeleton22,
    SOMASkeleton30,
    SOMASkeleton77,
)


def build_skeleton(nbjoints: int, assets_folder: str | Path = SKELETONS_ROOT):
    """Instantiate a known skeleton class from its joint count.

    Supported joint counts: 30 (SOMA compact), 34 (G1), 77 (SOMA full), 22 (SMPLX).

    Args:
        nbjoints: Number of joints expected in the skeleton representation.
        assets_folder: Base skeleton-assets directory containing per-skeleton subfolders.

    Returns:
        A configured `SkeletonBase` subclass instance.

    Raises:
        ValueError: If `nbjoints` does not match a registered skeleton.
    """
    assets_folder = Path(assets_folder)
    if nbjoints == 34:
        return G1Skeleton34(assets_folder / "g1skel34")
    elif nbjoints == 22:
        return SMPLXSkeleton22(assets_folder / "smplx22")
    elif nbjoints == 30:
        return SOMASkeleton30(assets_folder / "somaskel30")
    elif nbjoints == 77:
        return SOMASkeleton77(assets_folder / "somaskel77")
    else:
        raise ValueError("This skeleton is not recognized.")


================================================
FILE: kimodo/skeleton/transforms.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Rotation-space conversion utilities for skeleton motion data."""

import einops
import torch

from ..tools import ensure_batched
from .kinematics import batch_rigid_transform


def global_rots_to_local_rots(global_joint_rots: torch.Tensor, skeleton):
    """Convert global rotations to local rotations using a skeleton hierarchy.

    Args:
        global_joint_rots: Global rotation matrices with shape `(..., J, 3, 3)`.
        skeleton: Skeleton object exposing `joint_parents` and `root_idx`.

    Returns:
        Local rotation matrices with the same leading shape as the input.
    """
    # Doing big batch
    global_joint_mats, ps = einops.pack(
        [global_joint_rots],
        "* nbjoints dim1 dim2",
    )

    # obtain back the local rotations from the new global rotations
    parent_rot_mats = global_joint_mats[:, skeleton.joint_parents]

    parent_rot_mats[:, skeleton.root_idx] = torch.eye(3)  # the root joint
    parent_rot_mats_inv = parent_rot_mats.transpose(2, 3)
    local_rot_mats = torch.einsum(
        "T N m n, T N n o -> T N m o",
        parent_rot_mats_inv,
        global_joint_mats,
    )
    [local_rot_mats] = einops.unpack(local_rot_mats, ps, "* nbjoints dim1 dim2")
    return local_rot_mats


@ensure_batched(local_rot_mats=4)
def change_tpose(local_rot_mats: torch.Tensor, global_rot_offsets: torch.Tensor, skeleton):
    """Re-express local rotations in another t_pose based on the global rotation offsets.

    Args:
        local_rot_mats: Local rotation matrices with shape `(..., J, 3, 3)`.
        global_rot_offsets: Global rotation offsets with shape `(..., J, 3, 3)`.
        skeleton: Skeleton object exposing `joint_parents`,
            `root_idx`, and `nbjoints`.

    Returns:
        Tuple `(new_local_rot_mats, new_global_rot_mats)` in the standard frame.
    """

    device, dtype = local_rot_mats.device, local_rot_mats.dtype
    global_rot_offsets = global_rot_offsets.to(device=device, dtype=dtype)

    root_idx = skeleton.root_idx
    joint_parents = skeleton.joint_parents
    # These are dummy joint positions, will not be used
    neutral_joints = torch.ones((len(local_rot_mats), skeleton.nbjoints, 3), device=device, dtype=dtype)

    # get the old joint rotations in the same global space as the t-pose
    #   Note: the neutral joints we use here doesn't matter, because we are only using the global rotation outputs
    _, global_rot_mats = batch_rigid_transform(local_rot_mats, neutral_joints, joint_parents, root_idx)  # (T, N, 3, 3)

    # compute the desired joint rotations in the frame of the new t-pose
    new_global_rot_mats = torch.einsum("T N m n, N o n -> T N m o", global_rot_mats, global_rot_offsets)
    # convert back to local rotations
    new_local_rot_mats = global_rots_to_local_rots(new_global_rot_mats, skeleton)
    return new_local_rot_mats, new_global_rot_mats


@ensure_batched(local_rot_mats=4)
def to_standard_tpose(local_rot_mats: torch.Tensor, skeleton):
    """Re-express local rotations in the skeleton's standard T-pose convention.

    Args:
        local_rot_mats: Local rotation matrices with shape `(..., J, 3, 3)`.
        skeleton: Skeleton object exposing `global_rot_offsets`, `joint_parents`,
            `root_idx`, and `nbjoints`.

    Returns:
        Tuple `(new_local_rot_mats, new_global_rot_mats)` in the standard frame.
    """
    global_rot_offsets = skeleton.global_rot_offsets
    return change_tpose(local_rot_mats, global_rot_offsets, skeleton)


@ensure_batched(local_rot_mats=4)
def from_standard_tpose(local_rot_mats: torch.Tensor, skeleton):
    """Re-express local rotations from the skeleton's standard T-pose convention to the original
    formulation.

    Args:
        local_rot_mats: Local rotation matrices with shape `(..., J, 3, 3)`.
        skeleton: Skeleton object exposing `global_rot_offsets`, `joint_parents`,
            `root_idx`, and `nbjoints`.

    Returns:
        Tuple `(new_local_rot_mats, new_global_rot_mats)` in the standard frame.
    """
    global_rot_offsets = skeleton.global_rot_offsets
    global_rot_offsets_T = global_rot_offsets.mT  # do the inverse transform
    return change_tpose(local_rot_mats, global_rot_offsets_T, skeleton)


================================================
FILE: kimodo/tools.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Shared utilities: validation decorator, batching, JSON I/O, seeding, tensor conversion."""

import inspect
import json
import math
import random
from collections.abc import Mapping, Sequence
from functools import wraps
from math import prod
from pathlib import Path
from typing import Any, Callable, Mapping, Optional, ParamSpec, TypeVar, Union

import numpy as np
import torch


def validate(validator, save_args: bool = False, super_init: bool = False):
    """Create a decorator function for validating user inputs.

    Args:
        validator: the function to validate (pydantic dataclass)
        save (bool): save all the attributes to the obj [args[0]]
        super_init (bool): init parent with no arguments (useful for using save on a nn.Module)

    Returns:
        decorator: the decorator function
    """

    def decorator(func):
        @wraps(func)
        def validated_func(*args, **kwargs):
            conf = validator(**kwargs)

            if save_args:
                assert len(args) != 0
                obj = args[0]

                if super_init:
                    # init the parent module
                    super(type(obj), obj).__init__()

                for key, val in conf.__dict__.items():
                    setattr(obj, key, val)
            return func(*args, conf)

        return validated_func

    return decorator


# Type alias for clarity
Tensor = Any

P = ParamSpec("P")
R = TypeVar("R")


def ensure_batched(**spec: int) -> Callable[[Callable[P, R]], Callable[P, R]]:
    """Decorator to flatten complex batch dimensions.

    Fixes included:
    1. Handles 1D tensors (tail_ndim=0) correctly without slicing errors.
    2. Skips .reshape() if the input is already purely flat (Optimization).
    """
    if not spec:
        raise ValueError("At least one argument spec must be provided.")

    def decorator(fn: Callable[P, R]) -> Callable[P, R]:
        sig = inspect.signature(fn)

        @wraps(fn)
        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
            bound = sig.bind(*args, **kwargs)
            bound.apply_defaults()

            def _sequence_shape(name: str, value: Any) -> tuple[int, ...]:
                if not isinstance(value, (list, tuple)):
                    return ()
                if len(value) == 0:
                    return (0,)
                first_shape = _sequence_shape(name, value[0])
                for item in value[1:]:
                    item_shape = _sequence_shape(name, item)
                    if item_shape != first_shape:
                        raise ValueError(f"'{name}' must be a rectangular nested sequence, got ragged shape.")
                return (len(value), *first_shape)

            def _shape_and_ndim(name: str, value: Any) -> tuple[tuple[int, ...], int]:
                if hasattr(value, "shape") and hasattr(value, "ndim"):
                    shape = tuple(value.shape)
                    return shape, int(value.ndim)
                if isinstance(value, (list, tuple)):
                    shape = _sequence_shape(name, value)
                    return shape, len(shape)
                raise TypeError(f"'{name}' must be tensor-like or a nested list/tuple, got {type(value)}.")

            def _reshape_like(value: Any, shape: tuple[int, ...], name: str) -> Any:
                if hasattr(value, "reshape"):
                    return value.reshape(*shape)

                if not isinstance(value, (list, tuple)):
                    raise TypeError(f"Cannot reshape '{name}' of type {type(value)}.")

                flat: list[Any] = []

                def _flatten(x: Any) -> None:
                    if isinstance(x, (list, tuple)):
                        for item in x:
                            _flatten(item)
                    else:
                        flat.append(x)

                _flatten(value)
                expected_size = prod(shape) if shape else 1
                if len(flat) != expected_size:
                    raise ValueError(f"Cannot reshape '{name}' with {len(flat)} elements into shape {shape}.")

                def _build(index: int, dims: tuple[int, ...]) -> tuple[Any, int]:
                    if not dims:
                        return flat[index], index + 1
                    items = []
                    for _ in range(dims[0]):
                        item, index = _build(index, dims[1:])
                        items.append(item)
                    return items, index

                rebuilt, used = _build(0, shape)
                if used != len(flat):
                    raise ValueError(f"Internal reshape error for '{name}': used {used}/{len(flat)} elements.")
                if isinstance(value, tuple) and isinstance(rebuilt, list):
                    return tuple(rebuilt)
                return rebuilt

            # --- 1. CANONICAL ARGUMENT ---
            spec_items = list(spec.items())
            canonical_name = None
            canonical_ndim = None
            x0 = None
            for name, ndim in spec_items:
                candidate = bound.arguments.get(name, None)
                if candidate is not None:
                    canonical_name = name
                    canonical_ndim = ndim
                    x0 = candidate
                    break
            if canonical_name is None:
                raise ValueError(
                    "All canonical candidates are None: " + ", ".join(f"'{name}'" for name, _ in spec_items)
                )

            # Calculate split between Batch dims and Feature dims
            expected_tail_dims = canonical_ndim - 1  # e.g. 3 - 1 = 2 (Sequence, Feat)
            x0_shape, x0_ndim = _shape_and_ndim(canonical_name, x0)

            # Validation
            if x0_ndim < expected_tail_dims:
                raise ValueError(f"'{canonical_name}' ndim={x0_ndim} < expected {expected_tail_dims} tail dims.")

            # --- LOGIC FIX 1: Handle 0 tail dims correctly ---
            if expected_tail_dims == 0:
                orig_batch_shape = x0_shape
                tail_shape = ()
            else:
                orig_batch_shape = x0_shape[:-expected_tail_dims]
                tail_shape = x0_shape[-expected_tail_dims:]

            # Calculate flattened batch size
            # If orig_batch_shape is () (scalar input), size is 1.
            B_flat = prod(orig_batch_shape) if orig_batch_shape else 1

            # Determine if we added a fake batch dim (unbatched input)
            is_unbatched_input = len(orig_batch_shape) == 0

            # --- LOGIC FIX 2: Skip reshape if already flat (Optimization) ---
            # If batch shape is already 1D (e.g. [2]), we don't need to reshape [2, 140, 5] -> [2, 140, 5]
            is_already_flat = len(orig_batch_shape) == 1

            if is_unbatched_input:
                # (H, W) -> (1, H, W)
                x0_batched = _reshape_like(x0, (1, *tail_shape), canonical_name)
            elif is_already_flat:
                # (B, H, W) -> Keep as is
                x0_batched = x0
            else:
                # (B1, B2, H, W) -> (B1*B2, H, W)
                x0_batched = _reshape_like(x0, (B_flat, *tail_shape), canonical_name)

            bound.arguments[canonical_name] = x0_batched

            # --- 2. OTHER ARGUMENTS ---
            for name, target_ndim in spec_items:
                if name == canonical_name:
                    continue
                val = bound.arguments.get(name, None)
                if val is None:
                    continue

                arg_tail_dims = target_ndim - 1  # e.g. for lengths=1, tail=0
                val_shape, val_ndim = _shape_and_ndim(name, val)

                # Validate
                if val_ndim < arg_tail_dims:
                    raise ValueError(f"'{name}' ndim={val_ndim} too small.")

                # --- Get Batch Shape (With 0-tail fix) ---
                if arg_tail_dims == 0:
                    val_batch_shape = val_shape
                    val_tail_shape = ()
                else:
                    val_batch_shape = val_shape[:-arg_tail_dims]
                    val_tail_shape = val_shape[-arg_tail_dims:]

                # --- Check Mismatch ---
                # Unbatched inputs must match unbatched canonical
                if len(val_batch_shape) == 0:
                    if not is_unbatched_input:
                        raise ValueError(f"'{name}' is unbatched but canonical is batched.")
                    val_batched = _reshape_like(val, (1, *val_tail_shape), name)
                else:
                    # Batched inputs must match canonical batch shape EXACTLY
                    if val_batch_shape != orig_batch_shape:
                        raise ValueError(
                            f"Batch dimensions mismatch! '{canonical_name}' has {orig_batch_shape}, "
                            f"but '{name}' has {val_batch_shape}."
                        )

                    # Optimization: Don't reshape if already flat
                    if is_already_flat:
                        val_batched = val
                    else:
                        val_batched = _reshape_like(val, (B_flat, *val_tail_shape), name)

                bound.arguments[name] = val_batched

            # --- 3. EXECUTION ---
            out = fn(**bound.arguments)

            # --- 4. RESTORE ---
            def restore(obj):
                if isinstance(obj, Mapping):
                    return {k: restore(v) for k, v in obj.items()}
                if isinstance(obj, (list, tuple)):
                    return type(obj)(restore(x) for x in obj)

                if hasattr(obj, "shape"):
                    if obj.ndim == 0:
                        return obj

                    # Verify batch dimension exists and wasn't reduced
                    if obj.shape[0] != B_flat:
                        return obj

                    # If input was simple (B, ...), return simple (B, ...)
                    if is_already_flat:
                        return obj

                    rest = obj.shape[1:]

                    if is_unbatched_input:
                        assert obj.shape[0] == 1, "The batch size should be 1 for unbatched."
                        return obj[0]

                    return obj.reshape(*orig_batch_shape, *rest)
                return obj

            return restore(out)

        return wrapper

    return decorator


def to_numpy(obj):
    """Recursively convert tensors in dicts/lists/tuples to numpy arrays; leave other types
    unchanged."""
    if isinstance(obj, Mapping):
        return {k: to_numpy(v) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return type(obj)(to_numpy(x) for x in obj)
    if isinstance(obj, torch.Tensor):
        return obj.cpu().numpy()
    return obj


def to_torch(obj, device=None, dtype=None):
    """Recursively convert numpy arrays in dicts/lists/tuples to torch tensors; optionally move to
    device/dtype."""
    if isinstance(obj, Mapping):
        return {k: to_torch(v, device, dtype) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return type(obj)(to_torch(x, device, dtype) for x in obj)
    if isinstance(obj, np.ndarray):
        obj = torch.from_numpy(obj)
    if isinstance(obj, torch.Tensor):
        if dtype is not None:
            obj = obj.to(dtype=dtype)
        if device is None:
            return obj
        return obj.to(device)
    return obj


def seed_everything(seed: int, deterministic: bool = False) -> None:
    """Seed all random number generators."""
    random.seed(seed)  # for Python random module.
    np.random.seed(seed)  # for NumPy.
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True  # for deterministic behavior.
        torch.backends.cudnn.benchmark = False  # if you want to make the behavior deterministic.


def load_json(path: Union[str, Path]) -> Any:
    """Load a JSON file and return its contents.

    Args:
        path (str | Path): Path to the JSON file.

    Returns:
        Any: Parsed JSON content (dict, list, etc.).

    Raises:
        FileNotFoundError: If the file does not exist.
        ValueError: If the file is not valid JSON.
    """
    path = Path(path)

    if not path.exists():
        raise FileNotFoundError(f"JSON file not found: {path}")

    try:
        with path.open("r", encoding="utf-8") as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON in file {path}: {e}") from e


def save_json(path: Union[str, Path], data: Any) -> None:
    """Save data to a JSON file.

    Args:
        path (str | Path): Path to the JSON file.
        data (Any): Data to save (must be JSON serializable).

    Raises:
        ValueError: If the data is not JSON serializable.
    """
    path = Path(path)

    # Create parent directories if they don't exist
    path.parent.mkdir(parents=True, exist_ok=True)

    try:
        with path.open("w", encoding="utf-8") as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
    except (TypeError, ValueError) as e:
        raise ValueError(f"Data is not JSON serializable: {e}") from e


================================================
FILE: kimodo/viz/__init__.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Viser-based 3D visualization for skeletons and motion."""

from . import viser_utils
from .viser_utils import (
    Character,
    CharacterMotion,
    ConstraintSet,
    EEJointsKeyframeSet,
    FullbodyKeyframeSet,
    GuiElements,
    RootKeyframe2DSet,
    SkeletonMesh,
    WaypointMesh,
    load_example_cases,
)

__all__ = [
    "Character",
    "CharacterMotion",
    "ConstraintSet",
    "EEJointsKeyframeSet",
    "FullbodyKeyframeSet",
    "GuiElements",
    "RootKeyframe2DSet",
    "SkeletonMesh",
    "WaypointMesh",
    "load_example_cases",
    "viser_utils",
]


================================================
FILE: kimodo/viz/constraint_ui.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Constraint visualization and frame indexing for the viz UI."""

from typing import List, Optional

import numpy as np
import torch

import viser
import viser.transforms as tf
from kimodo.motion_rep.smooth_root import get_smooth_root_pos
from kimodo.skeleton import SkeletonBase
from kimodo.tools import to_numpy, to_torch

from .scene import SkeletonMesh, WaypointMesh


def update_interval(interval_start, interval_end, start_frame_idx, end_frame_idx):
    """Updates an interval after removing the range from start_frame_idx to end_frame_idx."""
    # Calculate new range after removing [start_frame_idx, end_frame_idx]
    # Case 1: Removal fully contains the interval -> delete entirely
    if start_frame_idx <= interval_start and end_frame_idx >= interval_end:
        return None, None  # Already removed, don't recreate
    # Case 2: Removal is at the start of interval -> shrink from start
    elif start_frame_idx <= interval_start and end_frame_idx < interval_end:
        new_start = end_frame_idx + 1
        new_end = interval_end
    # Case 3: Removal is at the end of interval -> shrink from end
    elif start_frame_idx > interval_start and end_frame_idx >= interval_end:
        new_start = interval_start
        new_end = start_frame_idx - 1
    # Case 4: Removal is in the middle -> keep the larger portion
    else:  # start_frame_idx > interval_start and end_frame_idx < interval_end
        left_size = start_frame_idx - interval_start
        right_size = interval_end - end_frame_idx
        if left_size >= right_size:
            new_start = interval_start
            new_end = start_frame_idx - 1
        else:
            new_start = end_frame_idx + 1
            new_end = interval_end
    return new_start, new_end


class ConstraintSet:
    def __init__(
        self,
        name: str,
        server: viser.ViserServer,
        skeleton: SkeletonBase,
        display_name: Optional[str] = None,
    ):
        self.name = name
        self.server = server
        self.skeleton = skeleton
        self.display_name = display_name if display_name is not None else name

        self.keyframes = dict()  # frame_idx -> poses
        self.frame2keyid = dict()  # frame_idx -> list of keyframe ids at this frame
        self.scene_elements = dict()  # frame_idx -> meshes, labels, etc.
        self.interval_labels = dict()  # (start_frame_idx, end_frame_idx) -> interval_label
        self.labels_visible = True

    def set_label_visibility(self, visible: bool) -> None:
        """Show or hide constraint labels without deleting them."""
        self.labels_visible = visible
        for scene_data in self.scene_elements.values():
            label = scene_data.get("label")
            if label is not None:
                label.visible = visible
        for interval_label in self.interval_labels.values():
            interval_label.visible = visible

    def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None:
        """Show all overlay elements, or only those at the given frame.

        Args:
            only_frame: If None, show all overlays. If int, show only overlays at that frame.
        """
        raise NotImplementedError("Subclasses must implement this method")

    def add_keyframe(self, keyframe_id: str, frame_idx: int, pose_data: torch.Tensor):
        """Adds a single keyframe at the given frame with the given pose data.

        Args:
            keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx.
            frame_idx: int, frame index to add the keyframe at
            pose_data: torch.Tensor, e.g. full-body pose, EE pose, 2D root pose, etc.
        """
        raise NotImplementedError("Subclasses must implement this method")

    def add_interval(
        self,
        interval_id: str,
        start_frame_idx: int,
        end_frame_idx: int,
        pose_seq_data: torch.Tensor,
    ):
        """Adds a keyframe interval between the given start and end frames with the given pose data.

        Args:
            interval_id: str, id for the interval. Must be unique within the given start_frame_idx and end_frame_idx.
            start_frame_idx: int, start frame index of the interval
            end_frame_idx: int, end frame index of the interval
            pose_seq_data: torch.Tensor, data for constrained interval, e.g. full-body poses, EE poses, 2D root poses, etc.
        """
        raise NotImplementedError("Subclasses must implement this method")

    def _add_interval_label(self, start_frame_idx: int, end_frame_idx: int):
        """
        Adds an interval label between the given start and end frames
        Args:
            start_frame_idx: int, start frame index of the interval
            end_frame_idx: int, end frame index of the interval
        """
        mid = int((start_frame_idx + end_frame_idx) / 2)
        interval_label_pos = self._get_label_pos(mid)
        interval_label = self.server.scene.add_label(
            name=f"/{self.name}/interval_label_{start_frame_idx}_{end_frame_idx}",
            text=f"{self.display_name} @ [{start_frame_idx}, {end_frame_idx}]",
            position=interval_label_pos,
            font_size_mode="screen",
            font_screen_scale=0.7,
            anchor="center-center",
        )
        interval_label.visible = self.labels_visible
        self.interval_labels[(start_frame_idx, end_frame_idx)] = interval_label

    def remove_keyframe(self, keyframe_id: str, frame_idx: int):
        """
        Removes a keyframe at the given frame
        Args:
            keyframe_id: str, id for the keyframe to remove
            frame_idx: int, frame index to remove the keyframe at
        """
        raise NotImplementedError("Subclasses must implement this method")

    def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int):
        """
        Removes an interval between the given start and end frames
        Args:
            interval_id: str, id for the interval to remove
            start_frame_idx: int, start frame index of the interval
            end_frame_idx: int, end frame index of the interval
        """
        raise NotImplementedError("Subclasses must implement this method")

    def _get_label_pos(self, frame_idx: int):
        """
        Returns the position of where to place the displayed label for the given frame index
        Args:
            frame_idx: int, frame index to get the label position for
        """
        raise NotImplementedError("Subclasses must implement this method")

    def _remove_interval_and_update_label(self, interval_id: str, start_frame_idx: int, end_frame_idx: int):
        """
        Removes an interval between the given start and end frames and updates the label
        Args:
            start_frame_idx: int, start frame index of the interval
            end_frame_idx: int, end frame index of the interval
        """
        for frame_idx in range(start_frame_idx, end_frame_idx + 1):
            self.remove_keyframe(interval_id, frame_idx)

        # Update interval labels that overlap with the removed range
        intervals_to_update = []
        for (interval_start, interval_end), label in list(self.interval_labels.items()):
            # Check if intervals overlap
            if interval_start <= end_frame_idx and interval_end >= start_frame_idx:
                intervals_to_update.append((interval_start, interval_end, label))

        for interval_start, interval_end, label in intervals_to_update:
            # Remove old label from scene and dict
            self.server.scene.remove_by_name(label.name)
            del self.interval_labels[(interval_start, interval_end)]

            new_start, new_end = update_interval(interval_start, interval_end, start_frame_idx, end_frame_idx)

            if new_start is None or new_end is None:
                continue

            # Create updated label with new range
            if new_start <= new_end:
                # Position label at midpoint - these keyframes are guaranteed to exist
                # since the new range is outside the removal range
                mid_frame = (new_start + new_end) // 2
                label_pos = self._get_label_pos(mid_frame)
                new_label = self.server.scene.add_label(
                    name=f"/{self.name}/interval_label_{new_start}_{new_end}",
                    text=f"{self.display_name} @ [{new_start}, {new_end}]",
                    position=label_pos,
                    font_size_mode="screen",
                    font_screen_scale=0.7,
                    anchor="center-center",
                )
                new_label.visible = self.labels_visible
                self.interval_labels[(new_start, new_end)] = new_label

    def get_constraint_info(self, device: Optional[str] = None):
        """Returns constraint information for generation (torch) or UI (numpy)."""
        raise NotImplementedError("Subclasses must implement this method")

    def get_frame_idx(self):
        """Returns all constrained frame indices in the set."""
        return [frame_idx for frame_idx in list(self.keyframes.keys())]

    def clear(self, frame_idx: Optional[int] = None):
        """
        Clears all keyframes and intervals from the constraint set
        Args:
            frame_idx: int, sing frame index to clear if given
        """
        raise NotImplementedError("Subclasses must implement this method")


def build_constraint_set_table_markdown(constraint_list: List[ConstraintSet]):
    markdown = "| Track | Frame Num |\n"
    markdown += "|------|----------|\n"

    # Sort constraints by frame_idx
    for constraint in constraint_list:
        frame_info = constraint.get_frame_idx()
        if len(frame_info) > 0:
            frame_info = ", ".join([str(frame) for frame in sorted(frame_info)])
        else:
            frame_info = "-"
        markdown += f"| {constraint.display_name} | {frame_info} |\n"

    return markdown


class FullbodyKeyframeSet(ConstraintSet):
    def __init__(
        self,
        name: str,
        server: viser.ViserServer,
        skeleton: SkeletonBase,
        display_name: Optional[str] = None,
    ):
        super().__init__(name, server, skeleton, display_name=display_name)

    def add_keyframe(
        self,
        keyframe_id: str,
        frame_idx: int,
        joints_pos: torch.Tensor | np.ndarray,
        joints_rot: torch.Tensor | np.ndarray,
        viz_label: bool = True,
        exists_ok: bool = False,
    ):
        """Adds a single full-body keyframe at the given frame or updates the existing one at this
        frame. Note if a keyframe already exists at this frame, it will be updated to the given
        pose.

        Args:
            keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx.
            frame_idx: int, frame index to add the keyframe at
            joints_pos: torch.Tensor, [J, 3] joints positions to add the keyframe at
        """
        # create/update scene elements
        if frame_idx in self.keyframes:
            skeleton_mesh = self.scene_elements[frame_idx]["skeleton_mesh"]
            skeleton_mesh.set_pose(to_torch(joints_pos))
            if viz_label and "label" in self.scene_elements[frame_idx]:
                label = self.scene_elements[frame_idx]["label"]
                label.position = to_numpy(joints_pos)[self.skeleton.root_idx]
                label.visible = self.labels_visible
        else:
            # create skeleton to visualize the full-body constraint
            skeleton_mesh = SkeletonMesh(
                f"/{self.name}/skeleton_{frame_idx}",
                self.server,
                self.skeleton,
                joint_color=(255, 235, 0),
                bone_color=(255, 0, 0),
                starting_joints_pos=to_torch(joints_pos),
            )
            self.scene_elements[frame_idx] = {
                "skeleton_mesh": skeleton_mesh,
            }
            if viz_label:
                label = self.server.scene.add_label(
                    name=f"/{self.name}/label_{frame_idx}",
                    text=f"{self.display_name} @ {frame_idx}",
                    position=to_numpy(joints_pos)[self.skeleton.root_idx],
                    font_size_mode="screen",
                    font_screen_scale=0.7,
                    anchor="center-center",
                )
                label.visible = self.labels_visible
                self.scene_elements[frame_idx]["label"] = label

        # set/update data
        self.keyframes[frame_idx] = {
            "joints_pos": to_numpy(joints_pos),
            "joints_rot": to_numpy(joints_rot),
        }

        if frame_idx not in self.frame2keyid:
            self.frame2keyid[frame_idx] = []

        if keyframe_id in self.frame2keyid[frame_idx]:
            if not exists_ok:
                raise AssertionError("keyframe_id already exists in this frame!")
        else:
            self.frame2keyid[frame_idx].append(keyframe_id)

    def add_interval(
        self,
        interval_id: str,
        start_frame_idx: int,
        end_frame_idx: int,
        joints_pos: torch.Tensor,
        joints_rot: torch.Tensor,
    ):
        """Adds a full-body keyframe interval between the given start and end frames.

        Args:
            start_frame_idx: int, start frame index of the interval
            end_frame_idx: int, end frame index of the interval
            joints_pos: torch.Tensor, [T, J, 3] joints positions within the interval
        """
        assert joints_pos.shape[0] == end_frame_idx - start_frame_idx + 1
        for frame_idx in range(start_frame_idx, end_frame_idx + 1):
            rel_idx = frame_idx - start_frame_idx
            self.add_keyframe(
                interval_id,
                frame_idx,
                joints_pos[rel_idx],
                joints_rot[rel_idx],
                viz_label=False,
            )

        # add separate interval label
        self._add_interval_label(start_frame_idx, end_frame_idx)

    def remove_keyframe(self, keyframe_id: str, frame_idx: int):
        if frame_idx not in self.keyframes:
            return
        if keyframe_id not in self.frame2keyid[frame_idx]:
            return
        self.frame2keyid[frame_idx].remove(keyframe_id)
        if len(self.frame2keyid[frame_idx]) == 0:
            del self.frame2keyid[frame_idx]
            self.clear(frame_idx)

    def _get_label_pos(self, frame_idx: int):
        return self.keyframes[frame_idx]["joints_pos"][self.skeleton.root_idx]

    def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int):
        self._remove_interval_and_update_label(interval_id, start_frame_idx, end_frame_idx)

    def get_constraint_info(self, device: Optional[str] = None):
        all_joints_pos = []
        all_joints_rot = []
        for v in self.keyframes.values():
            joints_pos = to_torch(v["joints_pos"], device=device)
            joints_rot = to_torch(v["joints_rot"], device=device)
            if len(joints_pos.shape) == 2:
                all_joints_pos.append(joints_pos[None])
            else:
                all_joints_pos.append(joints_pos)
            if len(joints_rot.shape) == 3:
                all_joints_rot.append(joints_rot[None])
            else:
                all_joints_rot.append(joints_rot)

        all_joints_pos = torch.cat(all_joints_pos, dim=0) if len(all_joints_pos) > 0 else None
        all_joints_rot = torch.cat(all_joints_rot, dim=0) if len(all_joints_rot) > 0 else None

        return {
            "frame_idx": self.get_frame_idx(),
            "joints_pos": all_joints_pos,
            "joints_rot": all_joints_rot,
        }

    def clear(self, frame_idx: Optional[int] = None):
        frame_idx_list = list(self.keyframes.keys()) if frame_idx is None else [frame_idx]
        for fidx in frame_idx_list:
            self.scene_elements[fidx]["skeleton_mesh"].clear()
            if "ee_rotation_axes" in self.scene_elements[fidx]:
                self.server.scene.remove_by_name(self.scene_elements[fidx]["ee_rotation_axes"].name)
            if "label" in self.scene_elements[fidx]:
                self.server.scene.remove_by_name(self.scene_elements[fidx]["label"].name)

            self.keyframes.pop(fidx)
            self.scene_elements.pop(fidx)
            self.frame2keyid.pop(fidx, None)

        if frame_idx is None:
            # clear all interval labels if clearing all keyframes
            for interval_label in list(self.interval_labels.values()):
                self.server.scene.remove_by_name(interval_label.name)
            self.interval_labels.clear()
            self.frame2keyid.clear()

    def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None:
        show_all = only_frame is None
        for fidx, scene_data in self.scene_elements.items():
            visible = show_all or fidx == only_frame
            scene_data["skeleton_mesh"].set_visibility(visible)
            label = scene_data.get("label")
            if label is not None:
                label.visible = visible and self.labels_visible
        for interval_label in self.interval_labels.values():
            interval_label.visible = show_all and self.labels_visible


class EEJointsKeyframeSet(ConstraintSet):
    def __init__(
        self,
        name: str,
        server: viser.ViserServer,
        skeleton: SkeletonBase,
        display_name: Optional[str] = None,
    ):
        super().__init__(name, server, skeleton, display_name=display_name)

        # frame_idx -> list of (keyframe_id, joint_names) at this frame
        self.frame2keyid = dict()

    def create_scene_elements(
        self,
        frame_idx: int,
        joints_pos: torch.Tensor | np.ndarray,
        joints_rot: Optional[torch.Tensor | np.ndarray],
        joint_names: List[str],
        viz_label: bool = True,
    ):
        # create skeleton to visualize the full-body constraint
        ee_joint_indices = []
        ee_gizmo_indices = []
        constrained_bone_idx = []
        for joint_name in joint_names:
            if joint_name == "Hips":
                continue
            elif joint_name in ["LeftHand", "RightHand", "LeftFoot", "RightFoot"]:
                expanded_joint_names = {
                    "LeftHand": self.skeleton.left_hand_joint_names,
                    "RightHand": self.skeleton.right_hand_joint_names,
                    "LeftFoot": self.skeleton.left_foot_joint_names,
                    "RightFoot": self.skeleton.right_foot_joint_names,
                }[joint_name]
                ee_joint_indices.extend([self.skeleton.bone_order_names_index[joint] for joint in expanded_joint_names])
                if len(expanded_joint_names) > 1:
                    ee_gizmo_indices.extend(
                        [self.skeleton.bone_order_names_index[joint] for joint in expanded_joint_names[:1]]
                    )
                constrained_bone_idx.extend(
                    [self.skeleton.bone_order_names_index[joint] - 1 for joint in expanded_joint_names[1:]]
                )
            else:
                raise ValueError(f"Invalid joint name: {joint_name}")

        # de-duplicate while preserving order
        ee_joint_indices = list(dict.fromkeys(ee_joint_indices))
        ee_gizmo_indices = list(dict.fromkeys(ee_gizmo_indices))
        constrained_bone_idx = list(dict.fromkeys(constrained_bone_idx))

        constrained_idx = [self.skeleton.root_idx] + ee_joint_indices

        constrained_idx = np.array(constrained_idx)
        constrained_bone_idx = np.array(constrained_bone_idx)

        # create skeleton to visualize the full-body constraint
        joint_color = np.full((self.skeleton.nbjoints, 3), (220, 220, 220))
        bone_color = np.full((self.skeleton.nbjoints - 1, 3), (220, 220, 220))
        # color constrained joints differently
        joint_color[constrained_idx] = (255, 0, 0)
        bone_color[constrained_bone_idx] = (255, 0, 0)
        skeleton_mesh = SkeletonMesh(
            f"/{self.name}/skeleton_{frame_idx}",
            self.server,
            self.skeleton,
            joint_color=joint_color,
            bone_color=bone_color,
            starting_joints_pos=to_torch(joints_pos),
        )

        self.scene_elements[frame_idx] = {
            "skeleton_mesh": skeleton_mesh,
        }
        joints_pos_np = to_numpy(joints_pos)
        joints_rot_np = to_numpy(joints_rot) if joints_rot is not None else None
        if joints_rot_np is not None and len(ee_gizmo_indices) > 0:
            ee_axes = self.server.scene.add_batched_axes(
                f"/{self.name}/ee_rot_axes_{frame_idx}",
                batched_wxyzs=tf.SO3.from_matrix(joints_rot_np[ee_gizmo_indices]).wxyz,
                batched_positions=joints_pos_np[ee_gizmo_indices],
                axes_length=0.07,
                axes_radius=0.007,
            )
            self.scene_elements[frame_idx]["ee_rotation_axes"] = ee_axes
        if viz_label:
            label = self.server.scene.add_label(
                name=f"/{self.name}/label_{frame_idx}",
                text=f"{self.display_name} @ {frame_idx}",
                position=joints_pos_np[self.skeleton.root_idx] + np.array([0.0, 0.05, 0.0]),
                font_size_mode="screen",
                font_screen_scale=0.7,
                anchor="bottom-center",
            )
            label.visible = self.labels_visible
            self.scene_elements[frame_idx]["label"] = label

    def add_keyframe(
        self,
        keyframe_id: str,
        frame_idx: int,
        joints_pos: torch.Tensor | np.ndarray,
        joints_rot: torch.Tensor | np.ndarray,
        joint_names: List[str],
        end_effector_type: str,
        viz_label: bool = True,
        exists_ok: bool = False,
    ):
        """Adds a single EE keyframe at the given frame or updates the existing one at this frame.

        Args:
            keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx.
            frame_idx: int, frame index to add the keyframe at
            joints_pos: torch.Tensor, [J, 3] joints positions to add the keyframe at
            joints_rot: torch.Tensor, [J, 3, 3] joints rotation matrices to add the keyframe at
            joint_names: List[str], names of the joints to add the keyframe at
        """
        need_create_viz = True
        joint_names_input = joint_names

        if not isinstance(end_effector_type, set):
            end_effector_type = set([end_effector_type])

        # create/update scene elements
        if frame_idx in self.keyframes:
            if joint_names != self.keyframes[frame_idx]["joint_names"]:
                # merge together with existing constraint if needed
                joint_names = set(joint_names)
                joint_names.update(set(self.keyframes[frame_idx]["joint_names"]))
                joint_names = list(joint_names)
                end_effector_type.update(self.keyframes[frame_idx]["end_effector_type"])
                # need to re-create viz elements
                self.clear(frame_idx)
            else:
                need_create_viz = False
                # overwrite the pose with the latest one
                skeleton_mesh = self.scene_elements[frame_idx]["skeleton_mesh"]
                skeleton_mesh.set_pose(to_torch(joints_pos))
                if "ee_rotation_axes" in self.scene_elements[frame_idx]:
                    ee_gizmo_indices = []
                    for joint_name in joint_names:
                        if joint_name == "Hips":
                            continue
                        elif joint_name in [
                            "LeftHand",
                            "RightHand",
                            "LeftFoot",
                            "RightFoot",
                        ]:
                            expanded_joint_names = {
                                "LeftHand": self.skeleton.left_hand_joint_names,
                                "RightHand": self.skeleton.right_hand_joint_names,
                                "LeftFoot": self.skeleton.left_foot_joint_names,
                                "RightFoot": self.skeleton.right_foot_joint_names,
                            }[joint_name]
                            if len(expanded_joint_names) > 0:
                                ee_gizmo_indices.extend(
                                    [self.skeleton.bone_order_names_index[joint] for joint in expanded_joint_names[:1]]
                                    # take only the base joint of the end effector (to avoid clutter)
                                )
                        else:
                            raise ValueError(f"Invalid joint name: {joint_name}")
                    ee_gizmo_indices = list(dict.fromkeys(ee_gizmo_indices))
                    if len(ee_gizmo_indices) > 0:
                        ee_axes = self.scene_elements[frame_idx]["ee_rotation_axes"]
                        joints_pos_np = to_numpy(joints_pos)
                        joints_rot_np = to_numpy(joints_rot)
                        ee_axes.batched_positions = joints_pos_np[ee_gizmo_indices]
                        ee_axes.batched_wxyzs = tf.SO3.from_matrix(joints_rot_np[ee_gizmo_indices]).wxyz
                if viz_label and "label" in self.scene_elements[frame_idx]:
                    label = self.scene_elements[frame_idx]["label"]
                    label.position = to_numpy(joints_pos)[self.skeleton.root_idx]
                    label.visible = self.labels_visible

        if need_create_viz:
            self.create_scene_elements(frame_idx, joints_pos, joints_rot, joint_names, viz_label=viz_label)

        # set/update data
        self.keyframes[frame_idx] = {
            "joints_pos": to_numpy(joints_pos),
            "joints_rot": to_numpy(joints_rot),
            "joint_names": joint_names,
            "end_effector_type": end_effector_type,
        }

        if frame_idx not in self.frame2keyid:
            self.frame2keyid[frame_idx] = []

        known_keyframe_ids = {k: idx for idx, (k, _) in enumerate(self.frame2keyid[frame_idx])}

        if keyframe_id in known_keyframe_ids.keys():
            if not exists_ok:
                raise AssertionError("keyframe_id already exists in this frame!")
            idx = known_keyframe_ids[keyframe_id]
            # override previous exisiting keyframe
            self.frame2keyid[frame_idx][idx] = (keyframe_id, joint_names_input)
        else:
            # track which subset of joints are constrained by this keyframe_id
            self.frame2keyid[frame_idx].append((keyframe_id, joint_names_input))

    def add_interval(
        self,
        interval_id: str,
        start_frame_idx: int,
        end_frame_idx: int,
        joints_pos: torch.Tensor | np.ndarray,
        joints_rot: torch.Tensor | np.ndarray,
        joint_names: List[str],
        end_effector_type: str,
    ):
        """Adds an interval of EE keyframes at the given frame or updates the existing one at this
        frame.

        Args:
            interval_id: str, id for the interval. Must be unique within the given start_frame_idx and end_frame_idx.
            start_frame_idx: int, start frame index to add the interval at
            end_frame_idx: int, end frame index to add the interval at
            joints_pos: torch.Tensor, [T, J, 3] joints positions to add the interval at
            joints_rot: torch.Tensor, [T, J, 3, 3] joints rotation matrices to add the interval at
            joint_names: List[str], names of the joints to add for the entire interval
        """
        num_frames = end_frame_idx - start_frame_idx + 1
        joints_pos_np = to_numpy(joints_pos)
        joints_rot_np = to_numpy(joints_rot)
        assert joints_pos_np.shape[0] == num_frames
        assert joints_rot_np.shape[0] == num_frames

        for frame_idx in range(start_frame_idx, end_frame_idx + 1):
            rel_idx = frame_idx - start_frame_idx
            self.add_keyframe(
                interval_id,
                frame_idx,
                joints_pos_np[rel_idx],
                joints_rot_np[rel_idx],
                joint_names,
                end_effector_type,
                viz_label=False,
            )
        self._add_interval_label(start_frame_idx, end_frame_idx)

    def remove_keyframe(self, keyframe_id: str, frame_idx: int):
        """Removes a keyframe at the given frame or updates the existing one at this frame by
        removing the specified joints.

        Args:
            keyframe_id: str, id for the keyframe to remove. This determines which joints to remove.
            frame_idx: int, frame index to remove the keyframe at
        """
        if frame_idx not in self.keyframes:
            return

        remaining_joint_names = set()
        delete_idx = None
        for i, (keyid, joint_names) in enumerate(self.frame2keyid[frame_idx]):
            if keyid == keyframe_id:
                delete_idx = i
            else:
                remaining_joint_names.update(joint_names)
        if delete_idx is None:
            # this keyframe_id is not in the specified frame
            return

        self.frame2keyid[frame_idx].pop(delete_idx)
        if len(remaining_joint_names) == 0:
            # no more keyframes in this frame, clear the frame
            del self.frame2keyid[frame_idx]
            self.clear(frame_idx)
            return

        # only deleting part of keyframe (potentially some subset of joints)
        # delete the old visualization and add a new one with the updated joint set
        new_joint_names = list(remaining_joint_names)
        self.clear(frame_idx, scene_elements_only=True)
        joints_pos = self.keyframes[frame_idx]["joints_pos"]
        joints_rot = self.keyframes[frame_idx]["joints_rot"]
        self.create_scene_elements(frame_idx, joints_pos, joints_rot, new_joint_names)
        self.keyframes[frame_idx]["joint_names"] = new_joint_names

    def _get_label_pos(self, frame_idx: int):
        return self.keyframes[frame_idx]["joints_pos"][self.skeleton.root_idx]

    def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int):
        self._remove_interval_and_update_label(interval_id, start_frame_idx, end_frame_idx)

    def get_constraint_info(self, device: Optional[str] = None):
        all_joints_pos = []
        all_joints_rot = []
        all_joints_names = []
        all_end_effector_type = []
        for v in self.keyframes.values():
            joints_pos = to_torch(v["joints_pos"], device=device)
            joints_rot = to_torch(v["joints_rot"], device=device)
            if len(joints_pos.shape) == 2:
                all_joints_pos.append(joints_pos[None])
            else:
                all_joints_pos.append(joints_pos)
            if len(joints_rot.shape) == 3:
                all_joints_rot.append(joints_rot[None])
            else:
                all_joints_rot.append(joints_rot)
            all_joints_names.append(v["joint_names"])
            all_end_effector_type.append(v["end_effector_type"])

        all_joints_pos = torch.cat(all_joints_pos, dim=0) if len(all_joints_pos) > 0 else None
        all_joints_rot = torch.cat(all_joints_rot, dim=0) if len(all_joints_rot) > 0 else None

        return {
            "frame_idx": self.get_frame_idx(),
            "joints_pos": all_joints_pos,
            "joints_rot": all_joints_rot,
            "joint_names": all_joints_names,
            "end_effector_type": all_end_effector_type,
        }

    def clear(self, frame_idx: Optional[int] = None, scene_elements_only: bool = False):
        frame_idx_list = list(self.keyframes.keys()) if frame_idx is None else [frame_idx]
        for fidx in frame_idx_list:
            self.scene_elements[fidx]["skeleton_mesh"].clear()
            if "ee_rotation_axes" in self.scene_elements[fidx]:
                self.server.scene.remove_by_name(self.scene_elements[fidx]["ee_rotation_axes"].name)
            if "label" in self.scene_elements[fidx]:
                self.server.scene.remove_by_name(self.scene_elements[fidx]["label"].name)
            self.scene_elements.pop(fidx)
            if not scene_elements_only:
                self.keyframes.pop(fidx)

        if frame_idx is None:
            # clear all interval labels if clearing all keyframes
            for interval_label in list(self.interval_labels.values()):
                self.server.scene.remove_by_name(interval_label.name)
            self.interval_labels.clear()

    def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None:
        show_all = only_frame is None
        for fidx, scene_data in self.scene_elements.items():
            visible = show_all or fidx == only_frame
            scene_data["skeleton_mesh"].set_visibility(visible)
            if "ee_rotation_axes" in scene_data:
                scene_data["ee_rotation_axes"].visible = visible
            label = scene_data.get("label")
            if label is not None:
                label.visible = visible and self.labels_visible
        for interval_label in self.interval_labels.values():
            interval_label.visible = show_all and self.labels_visible


class RootKeyframe2DSet(ConstraintSet):
    def __init__(
        self,
        name: str,
        server: viser.ViserServer,
        skeleton: SkeletonBase,
        display_name: Optional[str] = None,
    ):
        super().__init__(name, server, skeleton, display_name=display_name)
        self.dense_path = False
        self.smooth_path = True
        self.line_segments = None  # visualization of dense path
        self.interval_line_segments = {}

    def add_keyframe(
        self,
        keyframe_id: str,
        frame_idx: int,
        root_pos: torch.Tensor | np.ndarray,
        viz_label: bool = True,
        update_path: bool = True,
        viz_waypoint: bool = True,
        exists_ok: bool = False,
    ):
        """Adds a single 2D root keyframe at the given frame or updates the existing one at this
        frame.

        Args:
            keyframe_id: str, id for the keyframe. Must be unique within the given frame_idx.
            frame_idx: int, frame index to add the keyframe at
            root_pos: torch.Tensor, [3] root position to add the keyframe at, y entry (index 1) should be 0
            viz_label: bool, whether to visualize the label for the keyframe
        """
        root_pos_np = to_numpy(root_pos)
        if frame_idx not in self.scene_elements:
            self.scene_elements[frame_idx] = {}

        scene_data = self.scene_elements[frame_idx]
        if frame_idx in self.keyframes:
            waypoint = scene_data.get("waypoint")
            if waypoint is not None:
                waypoint.update_position(root_pos_np)
            elif viz_waypoint:
                waypoint = WaypointMesh(
                    f"/{self.name}/waypoint_{frame_idx}",
                    self.server,
                    position=root_pos_np,
                )
                scene_data["waypoint"] = waypoint

            label = scene_data.get("label")
            if viz_label and label is not None:
                label.position = root_pos_np
                label.visible = self.labels_visible
            elif viz_label and label is None:
                label = self.server.scene.add_label(
                    name=f"/{self.name}/label_{frame_idx}",
                    text=f"{self.display_name} @ {frame_idx}",
                    position=root_pos_np,
                    font_size_mode="screen",
                    font_screen_scale=0.7,
                    anchor="bottom-left",
                )
                label.visible = self.labels_visible
                scene_data["label"] = label
        else:
            if viz_waypoint:
                waypoint = WaypointMesh(
                    f"/{self.name}/waypoint_{frame_idx}",
                    self.server,
                    position=root_pos_np,
                )
                scene_data["waypoint"] = waypoint
            if viz_label:
                label = self.server.scene.add_label(
                    name=f"/{self.name}/label_{frame_idx}",
                    text=f"{self.display_name} @ {frame_idx}",
                    position=root_pos_np,
                    font_size_mode="screen",
                    font_screen_scale=0.7,
                    anchor="bottom-left",
                )
                label.visible = self.labels_visible
                scene_data["label"] = label

        # set/update data
        self.keyframes[frame_idx] = root_pos_np
        if frame_idx not in self.frame2keyid:
            self.frame2keyid[frame_idx] = []

        if keyframe_id in self.frame2keyid[frame_idx]:
            if not exists_ok:
                raise AssertionError("keyframe_id already exists in this frame!")
        else:
            self.frame2keyid[frame_idx].append(keyframe_id)

        # need to update path visualization
        if self.line_segments is not None and update_path:
            self.update_line_segments()

    def add_interval(
        self,
        interval_id: str,
        start_frame_idx: int,
        end_frame_idx: int,
        root_pos: torch.Tensor | np.ndarray,
    ):
        """Adds an interval of 2D root keyframes between the given start and end frames.

        Args:
            interval_id: str, id for the interval. Must be unique within the given start_frame_idx and end_frame_idx.
            start_frame_idx: int, start frame index to add the interval at
            end_frame_idx: int, end frame index to add the interval at
            root_pos: torch.Tensor, [T, 3] root positions to add the interval at
        """
        root_pos_np = to_numpy(root_pos)
        assert root_pos_np.shape[0] == end_frame_idx - start_frame_idx + 1
        if root_pos_np.shape[0] >= 2:
            points = np.zeros((root_pos_np.shape[0] - 1, 2, 3))
            points[:, 0] = root_pos_np[:-1]
            points[:, 1] = root_pos_np[1:]
            if interval_id in self.interval_line_segments:
                self.server.scene.remove_by_name(self.interval_line_segments[interval_id].name)
            self.interval_line_segments[interval_id] = self.server.scene.add_line_segments(
                name=f"/{self.name}/interval_{interval_id}_line",
                points=points,
                colors=(255, 0, 0),
                line_width=5.0,
            )

        for frame_idx in range(start_frame_idx, end_frame_idx + 1):
            rel_idx = frame_idx - start_frame_idx
            self.add_keyframe(
                interval_id,
                frame_idx,
                root_pos_np[rel_idx],
                viz_label=False,
                update_path=False,
                viz_waypoint=False,
            )
        self._add_interval_label(start_frame_idx, end_frame_idx)
        if self.line_segments is not None:
            self.update_line_segments()

    def set_smooth_path(self, smooth_path: bool):
        self.smooth_path = smooth_path
        if self.line_segments is not None:
            self.update_line_segments()

    def set_dense_path(self, dense_path: bool):
        """If dense_path is True, will make the path dense by interpolated between added keyframes.

        Args:
            dense_path: bool, whether to make the path dense
        """
        self.dense_path = dense_path
        if self.dense_path:
            # visualize dense path with line segments
            self.line_segments = self.server.scene.add_line_segments(
                name=f"/{self.name}/line_segments",
                points=np.zeros((1, 2, 3)),
                colors=(255, 0, 0),
                line_width=5.0,
            )
            self.update_line_segments()
        else:
            if self.line_segments is not None:
                self.server.scene.remove_by_name(self.line_segments.name)
                self.line_segments = None

    def interpolate_path(self, t: np.ndarray):
        """Interpolates the path between the given frame indices.

        Args:
            t: np.ndarray, frame indices to interpolate at
        """
        from scipy.interpolate import interp1d

        cur_info = self._get_sparse_constraint_info()
        frame_idx = cur_info["frame_idx"]
        all_root_pos = cur_info["root_pos"]

        x = all_root_pos[:, 0]
        z = all_root_pos[:, 2]

        kind = "linear"
        # if self.smooth_path and len(frame_idx) >= 3:
        # kind = "quadratic"

        interp_x = interp1d(frame_idx, x, kind=kind)
        interp_z = interp1d(frame_idx, z, kind=kind)

        x_new = interp_x(t)
        z_new = interp_z(t)

        path3d = np.stack([x_new, np.zeros_like(x_new), z_new], axis=1)

        if self.smooth_path and len(frame_idx) >= 3:
            path3d = get_smooth_root_pos(torch.from_numpy(path3d[None]))[0].numpy()
        return path3d

    def update_line_segments(self):
        if len(self.keyframes) < 2:
            return

        t = np.array(sorted(self.get_frame_idx()))
        if self.smooth_path:
            # more points for smoothed curve
            t = np.linspace(t[0], t[-1], 100)

        path3d = self.interpolate_path(t)

        points = np.zeros((len(path3d) - 1, 2, 3))
        points[:, 0] = path3d[:-1]
        points[:, 1] = path3d[1:]

        self.line_segments.points = points

    def remove_keyframe(self, keyframe_id: str, frame_idx: int):
        if frame_idx not in self.keyframes:
            return
        if keyframe_id not in self.frame2keyid[frame_idx]:
            return
        self.frame2keyid[frame_idx].remove(keyframe_id)
        if len(self.frame2keyid[frame_idx]) == 0:
            del self.frame2keyid[frame_idx]
            self.clear(frame_idx)
            if self.line_segments is not None:
                self.update_line_segments()

    def _get_label_pos(self, frame_idx: int):
        return self.keyframes[frame_idx]

    def remove_interval(self, interval_id: str, start_frame_idx: int, end_frame_idx: int):
        if interval_id in self.interval_line_segments:
            self.server.scene.remove_by_name(self.interval_line_segments[interval_id].name)
            del self.interval_line_segments[interval_id]
        self._remove_interval_and_update_label(interval_id, start_frame_idx, end_frame_idx)

    def _get_sparse_constraint_info(self):
        all_root_pos = []
        for v in self.keyframes.values():
            v_np = to_numpy(v)
            if len(v_np.shape) == 1:
                all_root_pos.append(v_np[None])
            else:
                all_root_pos.append(v_np)
        if len(all_root_pos) > 0:
            all_root_pos = np.concatenate(all_root_pos, axis=0)
        else:
            all_root_pos = None
        return {
            "frame_idx": self.get_frame_idx(),
            "root_pos": all_root_pos,
        }

    def get_constraint_info(self, device: Optional[str] = None):
        if not self.dense_path or len(self.keyframes) == 0:
            info = self._get_sparse_constraint_info()
            return {
                "frame_idx": info["frame_idx"],
                "root_pos": to_torch(info["root_pos"], device=device, dtype=torch.float32),
            }
        else:
            frame_idx_list = self.get_frame_idx()
            min_frame_idx = min(frame_idx_list)
            max_frame_idx = max(frame_idx_list)
            t = np.arange(min_frame_idx, max_frame_idx + 1)
            path3d = self.interpolate_path(t)
            return {
                "frame_idx": t.tolist(),
                "root_pos": to_torch(path3d, device=device, dtype=torch.float32),
            }

    def clear(self, frame_idx: Optional[int] = None):
        frame_idx_list = list(self.keyframes.keys()) if frame_idx is None else [frame_idx]
        for fidx in frame_idx_list:
            scene_data = self.scene_elements.get(fidx, {})
            waypoint = scene_data.get("waypoint")
            if waypoint is not None:
                waypoint.clear()
            label = scene_data.get("label")
            if label is not None:
                self.server.scene.remove_by_name(label.name)

            self.keyframes.pop(fidx)
            self.scene_elements.pop(fidx)

        if frame_idx is None:
            # clear all interval labels if clearing all keyframes
            for interval_label in list(self.interval_labels.values()):
                self.server.scene.remove_by_name(interval_label.name)
            self.interval_labels.clear()

            # clear line segments if turning off dense path
            if self.line_segments is not None:
                self.server.scene.remove_by_name(self.line_segments.name)
                self.line_segments = None

            for interval_line in list(self.interval_line_segments.values()):
                self.server.scene.remove_by_name(interval_line.name)
            self.interval_line_segments.clear()

    def set_overlay_visibility(self, only_frame: Optional[int] = None) -> None:
        show_all = only_frame is None
        for fidx, scene_data in self.scene_elements.items():
            visible = show_all or fidx == only_frame
            waypoint = scene_data.get("waypoint")
            if waypoint is not None:
                waypoint.set_visible(visible)
            label = scene_data.get("label")
            if label is not None:
                label.visible = visible and self.labels_visible
        if self.line_segments is not None:
            self.line_segments.visible = show_all
        for line_handle in self.interval_line_segments.values():
            line_handle.visible = show_all
        for interval_label in self.interval_labels.values():
            interval_label.visible = show_all and self.labels_visible


#
# GUI Elements that need to be tracked


================================================
FILE: kimodo/viz/coords.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Pure numpy coordinate/rotation helpers for viz."""

import numpy as np


def skew(v: np.ndarray) -> np.ndarray:
    """Skew-symmetric matrix for cross products: skew(v) @ x == np.cross(v, x)."""
    vx, vy, vz = float(v[0]), float(v[1]), float(v[2])
    return np.array([[0.0, -vz, vy], [vz, 0.0, -vx], [-vy, vx, 0.0]], dtype=np.float64)


def rotation_matrix_from_two_vec(v_from: np.ndarray, v_to: np.ndarray, eps: float = 1e-8) -> np.ndarray:
    """Return R such that R @ v_from ~= v_to (both treated as 3D vectors).

    Uses a Rodrigues-style construction, with special handling for near-parallel and near-opposite
    vectors for numerical stability.
    """
    a = np.asarray(v_from, dtype=np.float64).reshape(3)
    b = np.asarray(v_to, dtype=np.float64).reshape(3)
    na = np.linalg.norm(a)
    nb = np.linalg.norm(b)
    if na < eps or nb < eps:
        return np.eye(3, dtype=np.float64)
    a = a / na
    b = b / nb

    c = float(np.clip(np.dot(a, b), -1.0, 1.0))  # cos(theta)
    if c > 1.0 - eps:
        return np.eye(3, dtype=np.float64)
    if c < -1.0 + eps:
        # 180 deg rotation about any axis orthogonal to a:
        # R = -I + 2 * uu^T, where u is a unit axis orthogonal to a.
        axis_seed = np.array([1.0, 0.0, 0.0], dtype=np.float64)
        if abs(float(np.dot(a, axis_seed))) > 0.9:
            axis_seed = np.array([0.0, 1.0, 0.0], dtype=np.float64)
        u = np.cross(a, axis_seed)
        u = u / np.linalg.norm(u).clip(min=eps)
        return -np.eye(3, dtype=np.float64) + 2.0 * np.outer(u, u)

    v = np.cross(a, b)
    s2 = float(np.dot(v, v))  # ||v||^2 == sin^2(theta)
    K = skew(v)
    # R = I + K + K^2 * ((1 - c) / s^2)
    return np.eye(3, dtype=np.float64) + K + (K @ K) * ((1.0 - c) / s2)


================================================
FILE: kimodo/viz/g1_rig.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""G1 robot rig: mesh loading, joint mapping, and viser scene setup for G1 skeleton."""

import os
import xml.etree.ElementTree as ET
from typing import Any, Optional, Tuple

import numpy as np
import trimesh

import viser
import viser.transforms as tf
from kimodo.assets import skeleton_asset_path
from kimodo.skeleton import G1Skeleton34

# MuJoCo (z-up, x-forward) -> kimodo (y-up, z-forward)
MUJOCO_TO_KIMODO = np.array([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]], dtype=np.float64)

# MuJoCo (z-up, x-forward) -> kimodo (y-up, z-forward)
MUJOCO_TO_KIMODO = np.array([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]], dtype=np.float64)

G1_MESH_JOINT_MAP = {
    "pelvis_skel": ["pelvis.STL", "pelvis_contour_link.STL"],
    "left_hip_pitch_skel": ["left_hip_pitch_link.STL"],
    "left_hip_roll_skel": ["left_hip_roll_link.STL"],
    "left_hip_yaw_skel": ["left_hip_yaw_link.STL"],
    "left_knee_skel": ["left_knee_link.STL"],
    "left_ankle_pitch_skel": ["left_ankle_pitch_link.STL"],
    "left_ankle_roll_skel": ["left_ankle_roll_link.STL"],
    "right_hip_pitch_skel": ["right_hip_pitch_link.STL"],
    "right_hip_roll_skel": ["right_hip_roll_link.STL"],
    "right_hip_yaw_skel": ["right_hip_yaw_link.STL"],
    "right_knee_skel": ["right_knee_link.STL"],
    "right_ankle_pitch_skel": ["right_ankle_pitch_link.STL"],
    "right_ankle_roll_skel": ["right_ankle_roll_link.STL"],
    "waist_yaw_skel": ["waist_yaw_link_rev_1_0.STL", "waist_yaw_link.STL"],
    "waist_roll_skel": ["waist_roll_link_rev_1_0.STL", "waist_roll_link.STL"],
    "waist_pitch_skel": [
        "torso_link_rev_1_0.STL",
        "torso_link.STL",
        "logo_link.STL",
        "head_link.STL",
    ],
    "left_shoulder_pitch_skel": ["left_shoulder_pitch_link.STL"],
    "left_shoulder_roll_skel": ["left_shoulder_roll_link.STL"],
    "left_shoulder_yaw_skel": ["left_shoulder_yaw_link.STL"],
    "left_elbow_skel": ["left_elbow_link.STL"],
    "left_wrist_roll_skel": ["left_wrist_roll_link.STL"],
    "left_wrist_pitch_skel": ["left_wrist_pitch_link.STL"],
    "left_wrist_yaw_skel": ["left_wrist_yaw_link.STL", "left_rubber_hand.STL"],
    "right_shoulder_pitch_skel": ["right_shoulder_pitch_link.STL"],
    "right_shoulder_roll_skel": ["right_shoulder_roll_link.STL"],
    "right_shoulder_yaw_skel": ["right_shoulder_yaw_link.STL"],
    "right_elbow_skel": ["right_elbow_link.STL"],
    "right_wrist_roll_skel": ["right_wrist_roll_link.STL"],
    "right_wrist_pitch_skel": ["right_wrist_pitch_link.STL"],
    "right_wrist_yaw_skel": ["right_wrist_yaw_link.STL", "right_rubber_hand.STL"],
}

# Joint axis/limits from g1.xml (used by exports, e.g. MujocoQposConverter)
_G1_JOINT_AXIS_INDEX_CACHE: Optional[dict[str, int]] = None
_G1_JOINT_LIMITS_CACHE: Optional[dict[str, tuple[float, float]]] = None


def _get_g1_joint_axis_indices() -> dict[str, int]:
    """Return a map from G1 joint names to a single rotation axis index."""
    global _G1_JOINT_AXIS_INDEX_CACHE
    if _G1_JOINT_AXIS_INDEX_CACHE is not None:
        return _G1_JOINT_AXIS_INDEX_CACHE

    xml_path = str(skeleton_asset_path("g1skel34", "xml", "g1.xml"))
    if not os.path.exists(xml_path):
        _G1_JOINT_AXIS_INDEX_CACHE = {}
        return _G1_JOINT_AXIS_INDEX_CACHE

    tree = ET.parse(xml_path)
    root = tree.getroot()

    joint_axes = {}
    for xml_class in tree.findall(".//default"):
        if "class" not in xml_class.attrib:
            continue
        joint_nodes = xml_class.findall("joint")
        if joint_nodes:
            joint_axes[xml_class.get("class")] = joint_nodes[0].get("axis")

    axis_indices_by_name: dict[str, int] = {}
    for joint in root.find("worldbody").findall(".//joint"):
        axis_str = joint.get("axis") or joint_axes.get(joint.get("class"))
        if axis_str is None:
            continue
        axis_vals = np.array([float(x) for x in axis_str.split()], dtype=np.float64)
        if not np.any(axis_vals):
            continue
        axis_kimodo = MUJOCO_TO_KIMODO @ axis_vals
        axis_idx = int(np.argmax(np.abs(axis_kimodo)))
        axis_indices_by_name[joint.get("name").replace("_joint", "_skel")] = axis_idx

    _G1_JOINT_AXIS_INDEX_CACHE = axis_indices_by_name
    return _G1_JOINT_AXIS_INDEX_CACHE


def _get_g1_joint_limits() -> dict[str, tuple[float, float]]:
    """Return a map from G1 joint names to (min, max) angle limits in radians."""
    global _G1_JOINT_LIMITS_CACHE
    if _G1_JOINT_LIMITS_CACHE is not None:
        return _G1_JOINT_LIMITS_CACHE

    xml_path = str(skeleton_asset_path("g1skel34", "xml", "g1.xml"))
    if not os.path.exists(xml_path):
        _G1_JOINT_LIMITS_CACHE = {}
        return _G1_JOINT_LIMITS_CACHE

    tree = ET.parse(xml_path)
    root = tree.getroot()

    class_ranges: dict[str, tuple[float, float]] = {}
    for xml_class in tree.findall(".//default"):
        class_name = xml_class.get("class")
        if not class_name:
            continue
        joint_nodes = xml_class.findall("joint")
        if not joint_nodes:
            continue
        range_str = joint_nodes[0].get("range")
        if not range_str:
            continue
        range_vals = [float(x) for x in range_str.split()]
        if len(range_vals) != 2:
            continue
        class_ranges[class_name] = (range_vals[0], range_vals[1])

    joint_limits: dict[str, tuple[float, float]] = {}
    worldbody = root.find("worldbody")
    if worldbody is None:
        _G1_JOINT_LIMITS_CACHE = {}
        return _G1_JOINT_LIMITS_CACHE

    for joint in worldbody.findall(".//joint"):
        range_str = joint.get("range") or class_ranges.get(joint.get("class"))
        if range_str is None:
            continue
        if isinstance(range_str, tuple):
            joint_range = range_str
        else:
            range_vals = [float(x) for x in range_str.split()]
            if len(range_vals) != 2:
                continue
            joint_range = (range_vals[0], range_vals[1])
        joint_name = joint.get("name")
        if not joint_name:
            continue
        joint_limits[joint_name.replace("_joint", "_skel")] = joint_range

    _G1_JOINT_LIMITS_CACHE = joint_limits
    return _G1_JOINT_LIMITS_CACHE


_G1_JOINT_F2Q_DATA_CACHE: Optional[dict[str, dict[str, Any]]] = None


def get_g1_joint_f2q_data(
    skeleton: G1Skeleton34,
) -> dict[str, dict[str, Any]]:
    """Return per-hinge-joint f2q data for correct 1-DoF + limits in offset space.

    Each entry is for a G1 hinge joint (by name) and contains:
      - "offset_f2q": (3, 3) matrix such that R_f2q = offset_f2q @ R_local (kimodo).
      - "axis_f2q": (3,) unit axis in f2q space; angle = dot(axis_angle(R_f2q), axis_f2q).
      - "rest_dof_axis_angle": angle (rad) at T-pose in f2q space; MuJoCo q = angle_f2q - this.

    Limits from the XML apply to q = angle_f2q - rest_dof_axis_angle.
    """
    global _G1_JOINT_F2Q_DATA_CACHE
    if _G1_JOINT_F2Q_DATA_CACHE is not None:
        return _G1_JOINT_F2Q_DATA_CACHE

    from kimodo.exports.mujoco import MujocoQposConverter

    converter = MujocoQposConverter(skeleton)
    # converter: _rot_offsets_f2q[kimodo_idx], _mujoco_joint_axis_values_f2q_space[hinge_idx],
    # _rest_dofs_axis_angle[hinge_idx], _kimodo_indices_to_mujoco_indices[kimodo_idx] = hinge_idx+1 or 0
    out: dict[str, dict[str, Any]] = {}
    for j in range(skeleton.nbjoints):
        mujoco_one_based = converter._kimodo_indices_to_mujoco_indices[j].item()
        if mujoco_one_based <= 0:
            continue
        hinge_idx = mujoco_one_based - 1
        joint_name = skeleton.bone_order_names[j]
        offset_f2q = converter._rot_offsets_f2q[j].detach().cpu().numpy().astype(np.float64)
        axis_f2q = converter._mujoco_joint_axis_values_f2q_space[hinge_idx].detach().cpu().numpy().astype(np.float64)
        n = np.linalg.norm(axis_f2q)
        if n > 1e-10:
            axis_f2q = axis_f2q / n
        rest_dof = float(converter._rest_dofs_axis_angle[hinge_idx].detach().cpu().numpy())
        out[joint_name] = {
            "offset_f2q": offset_f2q,
            "axis_f2q": axis_f2q,
            "rest_dof_axis_angle": rest_dof,
        }
    _G1_JOINT_F2Q_DATA_CACHE = out
    return out


# -----------------------------------------------------------------------------
# Mesh loading cache (shared across G1 rig instances; each rig gets its own scene meshes)
# -----------------------------------------------------------------------------
_G1_MESH_DATA_CACHE: dict[str, list[dict]] = {}


def _load_g1_mesh_data(
    mesh_dir: str,
    skeleton: G1Skeleton34,
) -> list[dict]:
    """Load STL meshes and XML transforms once per mesh_dir; shared across rig instances."""
    if mesh_dir in _G1_MESH_DATA_CACHE:
        return _G1_MESH_DATA_CACHE[mesh_dir]

    mesh_geom_cache = G1MeshRig._mesh_geom_cache
    mesh_transform_cache = G1MeshRig._mesh_transform_cache

    # Load XML-derived transforms (cached inside _get_mesh_local_transforms_impl)
    mesh_file_transforms = _get_mesh_local_transforms_impl(mesh_dir, mesh_transform_cache)

    data_list: list[dict] = []
    for joint_name, mesh_files in G1_MESH_JOINT_MAP.items():
        if joint_name not in skeleton.bone_index:
            continue
        joint_idx = skeleton.bone_index[joint_name]
        for mesh_file in mesh_files:
            mesh_path = os.path.join(mesh_dir, mesh_file)
            if not os.path.exists(mesh_path):
                continue
            vertices, faces = _get_mesh_geometry_impl(mesh_file, mesh_path, mesh_dir, mesh_geom_cache)
            if vertices is None:
                continue
            geom_pos, geom_rot = mesh_file_transforms.get(
                mesh_file,
                (np.zeros(3, dtype=np.float64), np.eye(3, dtype=np.float64)),
            )
            data_list.append(
                {
                    "mesh_file": mesh_file,
                    "vertices": vertices,
                    "faces": faces,
                    "joint_idx": joint_idx,
                    "geom_pos": geom_pos.copy(),
                    "geom_rot": geom_rot.copy(),
                }
            )

    _G1_MESH_DATA_CACHE[mesh_dir] = data_list
    return data_list


def _get_mesh_geometry_impl(
    mesh_file: str,
    mesh_path: str,
    mesh_dir: str,
    mesh_geom_cache: dict,
) -> tuple[Optional[np.ndarray], Optional[np.ndarray]]:
    """Load one STL; result cached per mesh_dir and shared across rigs."""
    cached = mesh_geom_cache.get(mesh_dir)
    if cached is not None and mesh_file in cached:
        vertices, faces = cached[mesh_file]
        return vertices.copy(), faces.copy()

    mesh = trimesh.load_mesh(mesh_path, process=True)
    if isinstance(mesh, trimesh.Scene):
        mesh = trimesh.util.concatenate(mesh.dump())
    vertices = mesh.vertices @ MUJOCO_TO_KIMODO.T
    faces = mesh.faces

    if mesh_dir not in mesh_geom_cache:
        mesh_geom_cache[mesh_dir] = {}
    mesh_geom_cache[mesh_dir][mesh_file] = (vertices, faces)
    return vertices.copy(), faces.copy()


def _get_mesh_local_transforms_impl(
    mesh_dir: str,
    mesh_transform_cache: dict,
) -> dict[str, tuple[np.ndarray, np.ndarray]]:
    """Parse g1.xml once per mesh_dir; result shared across G1 rig instances."""
    cached = mesh_transform_cache.get(mesh_dir)
    if cached is not None:
        return {mesh_file: (pos.copy(), rot.copy()) for mesh_file, (pos, rot) in cached.items()}

    xml_path = os.path.abspath(os.path.join(mesh_dir, "..", "..", "xml", "g1.xml"))
    if not os.path.exists(xml_path):
        return {}
    tree = ET.parse(xml_path)
    root = tree.getroot()

    mesh_file_to_mesh_name = {}
    for mesh in root.findall(".//asset/mesh"):
        mesh_name = mesh.get("name")
        mesh_file = mesh.get("file")
        if mesh_name and mesh_file:
            mesh_file_to_mesh_name[mesh_file] = mesh_name

    mesh_name_to_transform = {}
    for geom in root.findall(".//geom"):
        mesh_name = geom.get("mesh")
        if mesh_name is None:
            continue
        pos = geom.get("pos")
        quat = geom.get("quat")
        if pos is None:
            geom_pos = np.zeros(3, dtype=np.float64)
        else:
            geom_pos = np.array([float(x) for x in pos.split()], dtype=np.float64)
        if quat is None:
            geom_rot = np.eye(3, dtype=np.float64)
        else:
            wxyz = np.array([float(x) for x in quat.split()], dtype=np.float64)
            geom_rot = tf.SO3(wxyz=wxyz).as_matrix()
        mesh_name_to_transform[mesh_name] = (geom_pos, geom_rot)

    mesh_file_transforms = {}
    for mesh_file, mesh_name in mesh_file_to_mesh_name.items():
        geom_pos, geom_rot = mesh_name_to_transform.get(
            mesh_name,
            (np.zeros(3, dtype=np.float64), np.eye(3, dtype=np.float64)),
        )
        geom_pos = MUJOCO_TO_KIMODO @ geom_pos
        geom_rot = MUJOCO_TO_KIMODO @ geom_rot @ MUJOCO_TO_KIMODO.T
        mesh_file_transforms[mesh_file] = (geom_pos, geom_rot)

    mesh_transform_cache[mesh_dir] = {mf: (pos.copy(), rot.copy()) for mf, (pos, rot) in mesh_file_transforms.items()}
    return mesh_file_transforms


class G1MeshRig:
    """Rig for G1 STL meshes.

    Each instance has its own scene meshes (so clear() only removes one character). Loading is
    shared: STL files and g1.xml are cached per mesh_dir via _load_g1_mesh_data() and the class-
    level _mesh_*_cache dicts.
    """

    _mesh_geom_cache: dict[str, dict[str, tuple[np.ndarray, np.ndarray]]] = {}
    _mesh_transform_cache: dict[str, dict[str, tuple[np.ndarray, np.ndarray]]] = {}

    def __init__(
        self,
        name: str,
        server: viser.ViserServer | viser.ClientHandle,
        skeleton: G1Skeleton34,
        mesh_dir: str,
        color: Tuple[int, int, int],
    ):
        self.server = server
        self.skeleton = skeleton
        self.mesh_dir = mesh_dir
        self.color = color
        self.mesh_handles: list[viser.SceneHandle] = []
        self.mesh_items: list[dict[str, object]] = []
        self._defer_initial_visibility = True

        data_list = _load_g1_mesh_data(mesh_dir, skeleton)

        for item in data_list:
            mesh_file = item["mesh_file"]
            vertices = item["vertices"]
            faces = item["faces"]
            joint_idx = item["joint_idx"]
            geom_pos = item["geom_pos"]
            geom_rot = item["geom_rot"]

            handle = self.server.scene.add_mesh_simple(
                f"/{name}/g1_mesh/{os.path.splitext(mesh_file)[0]}",
                vertices=vertices,
                faces=faces,
                opacity=None,
                color=self.color,
                wireframe=False,
                visible=not self._defer_initial_visibility,
            )
            self.mesh_handles.append(handle)
            self.mesh_items.append(
                {
                    "handle": handle,
                    "joint_idx": joint_idx,
                    "geom_pos": geom_pos,
                    "geom_rot": geom_rot,
                }
            )

        if self._defer_initial_visibility:
            for handle in self.mesh_handles:
                handle.visible = True

    def set_visibility(self, visible: bool) -> None:
        for handle in self.mesh_handles:
            handle.visible = visible

    def set_opacity(self, opacity: float) -> None:
        for handle in self.mesh_handles:
            handle.opacity = opacity

    def set_wireframe(self, wireframe: bool) -> None:
        for handle in self.mesh_handles:
            handle.wireframe = wireframe

    def set_color(self, color: Tuple[int, int, int]) -> None:
        self.color = color
        for handle in self.mesh_handles:
            handle.color = color

    def set_pose(self, joints_pos: np.ndarray, joints_rot: np.ndarray) -> None:
        for item in self.mesh_items:
            handle = item["handle"]
            joint_idx = item["joint_idx"]
            geom_pos = item["geom_pos"]
            geom_rot = item["geom_rot"]

            joint_pos = joints_pos[joint_idx]
            joint_rot = joints_rot[joint_idx]
            mesh_pos = joint_pos + joint_rot @ geom_pos
            mesh_rot = joint_rot @ geom_rot

            handle.position = mesh_pos
            handle.wxyz = tf.SO3.from_matrix(mesh_rot).wxyz

    def clear(self) -> None:
        for handle in self.mesh_handles:
            self.server.scene.remove_by_name(handle.name)
        self.mesh_handles = []
        self.mesh_items = []


================================================
FILE: kimodo/viz/gui.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""GUI element handles for the demo app."""

from dataclasses import dataclass

import viser


@dataclass
class GuiElements:
    gui_play_pause_button: viser.GuiInputHandle
    gui_next_frame_button: viser.GuiInputHandle
    gui_prev_frame_button: viser.GuiInputHandle
    gui_generate_button: viser.GuiInputHandle
    gui_model_fps: viser.GuiInputHandle[int]
    gui_timeline: viser.GuiInputHandle[int]
    gui_viz_skeleton_checkbox: viser.GuiInputHandle[bool]
    gui_viz_foot_contacts_checkbox: viser.GuiInputHandle[bool]
    gui_viz_skinned_mesh_checkbox: viser.GuiInputHandle[bool]
    gui_viz_skinned_mesh_opacity_slider: viser.GuiInputHandle[float]
    gui_camera_fov_slider: viser.GuiInputHandle[float]

    # generation controls
    gui_duration_slider: viser.GuiInputHandle[float]
    gui_num_samples_slider: viser.GuiInputHandle[int]
    gui_cfg_checkbox: viser.GuiCheckboxHandle
    gui_cfg_text_weight_slider: viser.GuiInputHandle[float]
    gui_cfg_constraint_weight_slider: viser.GuiInputHandle[float]
    gui_diffusion_steps_slider: viser.GuiInputHandle[int]
    gui_seed: viser.GuiInputHandle[int]
    gui_postprocess_checkbox: viser.GuiCheckboxHandle
    gui_root_margin: viser.GuiInputHandle[float]
    gui_real_robot_rotations_checkbox: viser.GuiInputHandle[bool]
    # appearance
    gui_dark_mode_checkbox: viser.GuiCheckboxHandle

    # which skinning method to use for SOMA
    gui_use_soma_layer_checkbox: viser.GuiCheckboxHandle


================================================
FILE: kimodo/viz/playback.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Playback and motion editing: CharacterMotion."""

from typing import Callable, Literal, Optional

import numpy as np
import torch

import viser.transforms as tf
from kimodo.skeleton import (
    G1Skeleton34,
    SOMASkeleton30,
    SOMASkeleton77,
    batch_rigid_transform,
    global_rots_to_local_rots,
)
from kimodo.tools import to_numpy, to_torch

from .g1_rig import (
    _get_g1_joint_axis_indices,
    _get_g1_joint_limits,
    get_g1_joint_f2q_data,
)
from .scene import Character


class CharacterMotion:
    def __init__(
        self,
        character: Character,
        joints_pos: torch.Tensor,
        joints_rot: torch.Tensor,
        foot_contacts: Optional[torch.Tensor] = None,
    ):
        self.character = character
        self.server = character.server
        self.skeleton = character.skeleton
        self.name = character.name

        # [T, J, 3] global joint positions
        self.joints_pos = joints_pos
        # [T, J, 3, 3] global joint rotation matrices
        self.joints_rot = joints_rot
        assert joints_pos.shape[0] == joints_rot.shape[0]
        # keep track of local rots as well for convenience during pose editing
        self.joints_local_rot = global_rots_to_local_rots(joints_rot, self.skeleton)

        self.length = joints_pos.shape[0]
        self.cur_frame_idx = None

        self.foot_contacts = foot_contacts
        if foot_contacts is not None:
            assert foot_contacts.shape[0] == self.length

        self.precompute_mesh_info()

        # gizmos for pose editing
        self.root_translation_gizmo = None
        self.updating_root_translation_gizmo = False
        self.joint_gizmos = None
        self.updating_joint_gizmos = False
        self.gizmo_space: Literal["world", "local"] = "local"
        self._drag_start_world_rot: list = []
        self._joint_gizmo_dragging: list[bool] = []

    def precompute_mesh_info(self):
        if self.character.skeleton_mesh is not None:
            print("Caching skeleton mesh info...")
            self.character.skeleton_mesh.precompute_mesh_info(self.joints_pos)
        if self.character.skinned_mesh is not None:
            print("Caching skinning info...")
            self.character.precompute_skinning(self.joints_pos, self.joints_rot)

    def set_frame(self, idx: int):
        """Sets the pose of the character to the given frame index."""
        idx = min(idx, self.length - 1)  # clamp to last frame
        cur_foot_contacts = self.foot_contacts[idx] if self.foot_contacts is not None else None
        self.character.set_pose(
            self.joints_pos[idx],
            self.joints_rot[idx],
            frame_idx=idx,
            foot_contacts=cur_foot_contacts,
        )
        self.cur_frame_idx = idx

        # update gizmos if frame has changed due to playback
        cur_root_pos = self.joints_pos[self.cur_frame_idx, self.skeleton.root_idx].clone()
        cur_root_pos[1] = 0.0
        if self.root_translation_gizmo is not None and not self.updating_root_translation_gizmo:
            self.root_translation_gizmo.position = cur_root_pos.cpu().numpy()
        if self.joint_gizmos is not None:
            for i, joint_gizmo in enumerate(self.joint_gizmos):
                # Do not push wxyz/position while this gizmo is being dragged;
                # otherwise the client receives e.g. identity and the gizmo snaps back.
                if not self.updating_joint_gizmos and not self._joint_gizmo_dragging[i]:
                    joint_gizmo.position = self.joints_pos[self.cur_frame_idx, i].cpu().numpy()
                    if self.gizmo_space == "world":
                        joint_gizmo.wxyz = (1.0, 0.0, 0.0, 0.0)
                    else:
                        joint_gizmo.wxyz = tf.SO3.from_matrix(self.joints_rot[self.cur_frame_idx, i].cpu().numpy()).wxyz

    def update_pose_at_frame(
        self,
        frame_idx: int,
        joints_pos: Optional[torch.Tensor] = None,
        joints_rot: Optional[torch.Tensor] = None,
        joints_local_rot: Optional[torch.Tensor] = None,
        foot_contacts: Optional[torch.Tensor] = None,
    ):
        """Overwrites one or more of the pose components at the given frame.

        If only a subset of joints_pos, joints_rot, or joints_local_rot are provided, the other
        components will be updated with FK.
        """
        if joints_pos is not None:
            joints_pos = to_torch(joints_pos, device=self.joints_pos.device, dtype=self.joints_pos.dtype)
            self.joints_pos[frame_idx] = joints_pos
            if joints_local_rot is None and joints_rot is None:
                raise NotImplementedError("No IK to update joint rotations accordingly.")
        if joints_rot is not None:
            joints_rot = to_torch(joints_rot, device=self.joints_rot.device, dtype=self.joints_rot.dtype)
            self.joints_rot[frame_idx] = joints_rot
            if joints_local_rot is None:
                # update local rots from global rots
                self.joints_local_rot[frame_idx] = global_rots_to_local_rots(joints_rot, self.skeleton)
            if joints_pos is None:
                # need to update with FK
                new_posed_joints, _ = batch_rigid_transform(
                    self.joints_local_rot[frame_idx : frame_idx + 1],
                    self.skeleton.neutral_joints[None].to(self.joints_local_rot.device),
                    self.skeleton.joint_parents.to(self.joints_local_rot.device),
                    self.skeleton.root_idx,
                )
                new_posed_joints = (
                    new_posed_joints[0]
                    + self.joints_pos[frame_idx, self.skeleton.root_idx : self.skeleton.root_idx + 1]
                    - self.skeleton.neutral_joints[[self.skeleton.root_idx]]
                )
                self.joints_pos[frame_idx] = new_posed_joints
        if joints_local_rot is not None:
            joints_local_rot = to_torch(joints_local_rot, device=self.joints_local_rot.device).to(
                dtype=self.joints_local_rot.dtype
            )
            self.joints_local_rot[frame_idx] = joints_local_rot
            if joints_rot is None or joints_pos is None:
                # need to update with FK
                new_posed_joints, new_global_rots = batch_rigid_transform(
                    self.joints_local_rot[frame_idx : frame_idx + 1],
                    self.skeleton.neutral_joints[None].to(self.joints_local_rot.device),
                    self.skeleton.joint_parents.to(self.joints_local_rot.device),
                    self.skeleton.root_idx,
                )
                new_posed_joints = (
                    new_posed_joints[0]
                    + self.joints_pos[frame_idx, self.skeleton.root_idx : self.skeleton.root_idx + 1]
                    - self.skeleton.neutral_joints[[self.skeleton.root_idx]]
                )
                if joints_rot is None:
                    self.joints_rot[frame_idx] = new_global_rots[0]
                if joints_pos is None:
                    self.joints_pos[frame_idx] = new_posed_joints
        if foot_contacts is not None:
            foot_contacts = to_torch(foot_contacts, device=self.foot_contacts.device).to(dtype=self.foot_contacts.dtype)
            self.foot_contacts[frame_idx] = foot_contacts

        if self.character.skeleton_mesh is not None:
            self.character.skeleton_mesh.update_mesh_info_cache(self.joints_pos[frame_idx], frame_idx)
        if self.character.skinned_mesh is not None:
            self.character.update_skinning_cache(self.joints_pos[frame_idx], self.joints_rot[frame_idx], frame_idx)

    def clear(self):
        self.character.clear()

    #
    # Editing helpers
    #
    def get_current_projected_root_pos(self) -> np.ndarray:
        """Get the projected root position on the ground at the current frame."""
        root_pos = self.joints_pos[self.cur_frame_idx, self.skeleton.root_idx].clone()
        root_pos[1] = 0.0
        return to_numpy(root_pos)

    def get_projected_root_pos(self, start_frame_idx: int, end_frame_idx: int = None) -> np.ndarray:
        """If requested frames are out of range, simply pads with the last frame to get expected
        length."""
        if end_frame_idx is None:
            expected_len = 1
        else:
            expected_len = end_frame_idx - start_frame_idx + 1
        if start_frame_idx >= self.length:
            start_frame_idx = self.length - 1
        if end_frame_idx is None or expected_len == 1:
            root_pos = self.joints_pos[start_frame_idx, self.skeleton.root_idx].clone()
            root_pos[1] = 0.0
            return to_numpy(root_pos)
        else:
            if end_frame_idx >= self.length:
                end_frame_idx = self.length - 1
            root_pos = self.joints_pos[start_frame_idx : end_frame_idx + 1, self.skeleton.root_idx].clone()
            root_pos[:, 1] = 0.0
            if root_pos.shape[0] < expected_len:
                # pad with the last root position
                root_pos = torch.cat(
                    [
                        root_pos,
                        root_pos[-1:].repeat(expected_len - root_pos.shape[0], 1),
                    ],
                    dim=0,
                )
            return to_numpy(root_pos)

    def set_projected_root_pos_path(
        self,
        root_pos_path: np.ndarray | torch.Tensor,
        min_frame_idx: int = None,
        max_frame_idx: int = None,
    ):
        """Sets the projected root position path for the character motion. Can set only a subset of
        the path by providing min_frame_idx and max_frame_idx. If not provided, will set the full
        path.

        Args:
            root_pos_path: torch.Tensor, [T, 2] projected root positions
            min_frame_idx: int, optional, minimum frame index to set the path at
            max_frame_idx: int, optional, maximum frame index to set the path at
        """
        if min_frame_idx is not None or max_frame_idx is not None:
            assert (
                min_frame_idx is not None and max_frame_idx is not None
            ), "min_frame_idx and max_frame_idx must be provided if setting path at specific frames"
            if min_frame_idx >= self.length:
                # both are out of bounds
                return
            max_frame_idx = min(max_frame_idx, self.length - 1)
            root_pos_path = root_pos_path[min_frame_idx : max_frame_idx + 1]
        else:
            assert root_pos_path.shape[0] == self.length
            min_frame_idx = 0
            max_frame_idx = self.length - 1

        cur_joints_pos = self.joints_pos.clone()[min_frame_idx : max_frame_idx + 1]
        root_pos_tensor = to_torch(root_pos_path, device=cur_joints_pos.device, dtype=cur_joints_pos.dtype)
        diff = root_pos_tensor - cur_joints_pos[:, self.skeleton.root_idx, [0, 2]]
        cur_joints_pos[:, :, [0, 2]] += diff.unsqueeze(1)
        for frame_idx in range(min_frame_idx, max_frame_idx + 1):
            rel_idx = frame_idx - min_frame_idx
            self.update_pose_at_frame(
                frame_idx,
                joints_pos=cur_joints_pos[rel_idx],
                joints_rot=self.joints_rot[frame_idx],
                joints_local_rot=self.joints_local_rot[frame_idx],
            )
        # update immediately to show changes
        self.set_frame(self.cur_frame_idx)

    def get_joints_pos(self, start_frame_idx: int, end_frame_idx: int = None) -> np.ndarray:
        """If requested frames are out of range, simply pads with the last frame to get expected
        length."""
        if end_frame_idx is None:
            expected_len = 1
        else:
            expected_len = end_frame_idx - start_frame_idx + 1
        if start_frame_idx >= self.length:
            start_frame_idx = self.length - 1
        if end_frame_idx is None or expected_len == 1:
            return to_numpy(self.joints_pos[start_frame_idx].clone())
        else:
            if end_frame_idx >= self.length:
                end_frame_idx = self.length - 1
            return_joints_pos = self.joints_pos[start_frame_idx : end_frame_idx + 1].clone()
            if return_joints_pos.shape[0] < expected_len:
                # pad with the last pose
                return_joints_pos = torch.cat(
                    [
                        return_joints_pos,
                        return_joints_pos[-1:].repeat(expected_len - return_joints_pos.shape[0], 1, 1),
                    ],
                    dim=0,
                )
            return to_numpy(return_joints_pos)

    def get_joints_rot(self, start_frame_idx: int, end_frame_idx: int = None) -> np.ndarray:
        """If requested frames are out of range, simply pads with the last frame to get expected
        length."""
        if end_frame_idx is None:
            expected_len = 1
        else:
            expected_len = end_frame_idx - start_frame_idx + 1
        if start_frame_idx >= self.length:
            start_frame_idx = self.length - 1
        if end_frame_idx is None or expected_len == 1:
            return to_numpy(self.joints_rot[start_frame_idx].clone())
        else:
            if end_frame_idx >= self.length:
                end_frame_idx = self.length - 1
            return_joints_rot = self.joints_rot[start_frame_idx : end_frame_idx + 1].clone()
            if return_joints_rot.shape[0] < expected_len:
                # pad with the last pose
                return_joints_rot = torch.cat(
                    [
                        return_joints_rot,
                        return_joints_rot[-1:].repeat(expected_len - return_joints_rot.shape[0], 1, 1, 1),
                    ],
                    dim=0,
                )
            return to_numpy(return_joints_rot)

    def get_current_joints_pos(self) -> torch.Tensor:
        return self.joints_pos[self.cur_frame_idx].clone()

    def get_current_joints_rot(self) -> torch.Tensor:
        return self.joints_rot[self.cur_frame_idx].clone()

    def add_root_translation_gizmo(
        self,
        constraints: dict,
        on_2d_root_drag_end: Optional[Callable[[], None]] = None,
        on_drag_start: Optional[Callable[[], None]] = None,
    ):
        """Create and initialize gizmo to control the root translation.

        When the user drags the root 2D gizmo, path updates are skipped until release. Optional
        on_2d_root_drag_end is called when the drag ends (e.g. to refresh dense path). on_drag_start
        is called when the drag begins (e.g. to snapshot state for undo).
        """
        # TODO: could also allow rotation around y-axis
        self.root_translation_gizmo = self.server.scene.add_transform_controls(
            f"/{self.name}/gizmo_root_translation",
            scale=0.5,
            line_width=2.5,
            active_axes=(True, False, True),  # only allow translation on xz plane
            disable_axes=False,
            disable_sliders=False,
            disable_rotations=True,
            depth_test=False,  # render even when occluded
        )
        init_position = self.get_current_projected_root_pos()
        self.root_translation_gizmo.position = init_position

        @self.root_translation_gizmo.on_drag_start
        def _(_):
            if on_drag_start is not None:
                on_drag_start()

        @self.root_translation_gizmo.on_update
        def _(_):
            self.updating_root_translation_gizmo = True
            # translate to gizmo position
            new_root_pos = to_torch(
                self.root_translation_gizmo.position,
                device=self.joints_pos.device,
            ).to(dtype=self.joints_pos.dtype)
            cur_joints_pos = self.joints_pos[self.cur_frame_idx].clone()
            root_diff = new_root_pos - cur_joints_pos[self.skeleton.root_idx]
            root_diff[1] = 0.0  # don't change height
            cur_joints_pos += root_diff[None]
            self.update_pose_at_frame(
                self.cur_frame_idx,
                joints_pos=cur_joints_pos,
                joints_rot=self.joints_rot[self.cur_frame_idx],
                joints_local_rot=self.joints_local_rot[self.cur_frame_idx],
            )

            self.updating_root_translation_gizmo = False
            # update immediately to show user changes
            self.set_frame(self.cur_frame_idx)
            # update the 2D waypoint constraints as well if there is one
            if "2D Root" in constraints:
                root_2d_contraints = constraints["2D Root"]
                # if there is a constraint at that frame, we want to update it
                frame_idx = self.cur_frame_idx
                if frame_idx in root_2d_contraints.keyframes:
                    for keyframe_id in root_2d_contraints.frame2keyid[frame_idx]:
                        # add will modify the existing constraint
                        # update_path=False during drag to avoid lag; path refreshes on_drag_end
                        root_2d_contraints.add_keyframe(
                            keyframe_id,
                            frame_idx,
                            root_pos=new_root_pos,
                            exists_ok=True,
                            update_path=False,
                        )
            if "Full-Body" in constraints:
                full_body_constraints = constraints["Full-Body"]
                # if there is a constraint at that frame, we want to update it
                frame_idx = self.cur_frame_idx
                if frame_idx in full_body_constraints.keyframes:
                    current_dict = full_body_constraints.keyframes[frame_idx]
                    for keyframe_id in full_body_constraints.frame2keyid[frame_idx]:
                        # add will modify the existing constraint
                        full_body_constraints.add_keyframe(
                            keyframe_id,
                            frame_idx,
                            joints_pos=cur_joints_pos,
                            joints_rot=current_dict["joints_rot"],
                            exists_ok=True,
                        )
            if "End-Effectors" in constraints:
                end_effector_constraints = constraints["End-Effectors"]
                # if there is a constraint at that frame, we want to update it
                frame_idx = self.cur_frame_idx
                if frame_idx in end_effector_constraints.keyframes:
                    current_dict = end_effector_constraints.keyframes[frame_idx]
                    for keyframe_id, _ in end_effector_constraints.frame2keyid[frame_idx]:
                        # add will modify the existing constraint
                        end_effector_constraints.add_keyframe(
                            keyframe_id,
                            frame_idx,
                            joints_pos=cur_joints_pos,
                            joints_rot=current_dict["joints_rot"],
                            joint_names=current_dict["joint_names"],
                            end_effector_type=current_dict["end_effector_type"],
                            exists_ok=True,
                        )

        @self.root_translation_gizmo.on_drag_end
        def _on_drag_end(_):
            # Refresh path visualization and dense path after release.
            if "2D Root" in constraints:
                root_2d = constraints["2D Root"]
                if root_2d.line_segments is not None:
                    root_2d.update_line_segments()
            if on_2d_root_drag_end is not None:
                on_2d_root_drag_end()

    def add_joint_gizmos(
        self,
        constraints: dict,
        space: Literal["world", "local"] = "local",
        on_drag_start: Optional[Callable[[], None]] = None,
    ):
        # Remove existing joint gizmos first so the client gets remove then add,
        # avoiding in-place update that can briefly show duplicate gizmos.
        if self.joint_gizmos is not None:
            for joint_gizmo in self.joint_gizmos:
                self.server.scene.remove_by_name(joint_gizmo.name)
            self.joint_gizmos = None

        self.joint_gizmos = []
        self.gizmo_space = space
        # For world mode: store joint world rotation at drag start to compose with
        # PivotControls' cumulative-from-identity drag rotation.
        self._drag_start_world_rot = [None] * self.skeleton.nbjoints
        # Skip pushing wxyz/position in set_frame while a gizmo is being dragged,
        # so the client does not receive "snap back" (e.g. identity for world mode).
        self._joint_gizmo_dragging = [False] * self.skeleton.nbjoints

        joint_axis_indices = None
        joint_limits = None
        joint_f2q_data = None
        hidden_gizmo_joints = None
        if isinstance(self.skeleton, G1Skeleton34):
            joint_axis_indices = _get_g1_joint_axis_indices()
            joint_limits = _get_g1_joint_limits()
            joint_f2q_data = get_g1_joint_f2q_data(self.skeleton)
            hidden_gizmo_joints = {
                "left_hand_roll_skel",
                "right_hand_roll_skel",
                "left_toe_base",
                "right_toe_base",
            }
        elif isinstance(self.skeleton, SOMASkeleton77):
            skel30_names = {name for name, _ in SOMASkeleton30.bone_order_names_with_parents}
            hidden_gizmo_joints = {name for name in self.skeleton.bone_order_names if name not in skel30_names}
            hidden_gizmo_joints |= {
                "RightHandThumbEnd",
                "RightHandMiddleEnd",
                "LeftHandThumbEnd",
                "LeftHandMiddleEnd",
                "LeftEye",
                "RightEye",
                "Jaw",
            }
        elif isinstance(self.skeleton, SOMASkeleton30):
            hidden_gizmo_joints = {
                "RightHandThumbEnd",
                "RightHandMiddleEnd",
                "LeftHandThumbEnd",
                "LeftHandMiddleEnd",
                "LeftEye",
                "RightEye",
                "Jaw",
            }

        if space == "world":
            # World mode: gizmo rings stay scene-axis-aligned (identity).
            joints_wxyzs = np.tile(
                np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float64),
                (self.skeleton.nbjoints, 1),
            )
        else:
            # Local mode: gizmo shows joint world rotation so rings follow the joint.
            joints_wxyzs = tf.SO3.from_matrix(self.joints_rot[self.cur_frame_idx].cpu().numpy()).wxyz
        for joint_idx in range(self.skeleton.nbjoints):
            disable_axes = True  # by default, only rotation controls
            disable_sliders = True
            if joint_idx == self.skeleton.root_idx:
                disable_axes = False  # allow translation for root
                disable_sliders = False
            active_axes = (True, True, True)
            if joint_axis_indices is not None:
                joint_name = self.skeleton.bone_order_names[joint_idx]
                axis_idx = joint_axis_indices.get(joint_name)
                if axis_idx is not None:
                    # PivotControls shows rotation handles when a plane is active.
                    # To allow rotation about one axis, enable the other two axes.
                    active_axes = (
                        axis_idx != 0,
                        axis_idx != 1,
                        axis_idx != 2,
                    )
            joint_visible = True
            if hidden_gizmo_joints is not None:
                joint_name = self.skeleton.bone_order_names[joint_idx]
                joint_visible = joint_name not in hidden_gizmo_joints
            cur_joint_gizmo = self.server.scene.add_transform_controls(
                f"/{self.name}/gizmo_joint_{joint_idx}",
                scale=0.075,
                line_width=4.0,
                active_axes=active_axes,
                disable_axes=disable_axes,
                disable_sliders=disable_sliders,
                disable_rotations=False,
                depth_test=False,  # render even when occluded
                position=self.joints_pos[self.cur_frame_idx, joint_idx].cpu().numpy(),
                wxyz=joints_wxyzs[joint_idx],
                visible=joint_visible,
                space=space,
            )
            self.joint_gizmos.append(cur_joint_gizmo)

            def set_callback_in_closure(i: int) -> None:
                @cur_joint_gizmo.on_drag_start
                def _on_drag_start(_) -> None:
                    if on_drag_start is not None:
                        on_drag_start()
                    self._joint_gizmo_dragging[i] = True
                    if self.gizmo_space == "world":
                        self._drag_start_world_rot[i] = self.joints_rot[self.cur_frame_idx, i].clone().cpu().numpy()

                @cur_joint_gizmo.on_drag_end
                def _on_drag_end(_) -> None:
                    self._joint_gizmo_dragging[i] = False
                    # Force-sync so the client always receives the reset (viser setter skips on allclose).
                    # Use self.joint_gizmos[i] (not cur_joint_gizmo) to avoid the
                    # closure-in-loop bug: cur_joint_gizmo would point to the last handle.
                    gizmo = self.joint_gizmos[i]
                    gizmo.sync_position(self.joints_pos[self.cur_frame_idx, i].cpu().numpy())
                    if self.gizmo_space == "world":
                        gizmo.sync_wxyz((1.0, 0.0, 0.0, 0.0))
                    else:
                        gizmo.sync_wxyz(tf.SO3.from_matrix(self.joints_rot[self.cur_frame_idx, i].cpu().numpy()).wxyz)
                    self.set_frame(self.cur_frame_idx)

                @cur_joint_gizmo.on_update
                def _(_) -> None:
                    self.updating_joint_gizmos = True
                    new_local_joint_rots = self.joints_local_rot[self.cur_frame_idx].clone()
                    # Gizmo parent is identity; client sends rotation as wxyz.
                    # World mode: wxyz is cumulative from identity, compose with
                    # stored initial world rotation. Local mode: wxyz is new world rotation.
                    gizmo_rot_mat = tf.SO3(self.joint_gizmos[i].wxyz).as_matrix()
                    if self.gizmo_space == "world" and self._drag_start_world_rot[i] is not None:
                        new_world_rot_mat = gizmo_rot_mat @ self._drag_start_world_rot[i]
                    else:
                        new_world_rot_mat = gizmo_rot_mat
                    parent_idx = self.skeleton.joint_parents[i].item()
                    if parent_idx >= 0:
                        R_parent_world = self.joints_rot[self.cur_frame_idx, parent_idx].detach().cpu().numpy()
                        new_local_rot_mat_np = (R_parent_world.T @ new_world_rot_mat).astype(np.float32)
                    else:
                        new_local_rot_mat_np = new_world_rot_mat.astype(np.float32)
                    new_local_rot = tf.SO3.from_matrix(new_local_rot_mat_np)
                    joint_name = self.skeleton.bone_order_names[i]
                    if joint_f2q_data is not None and joint_name in joint_f2q_data:
                        # G1 hinge: use offset (f2q) space so 1-DoF and limits match the robot.
                        # R_f2q = offset_f2q @ R_local; angle_f2q = dot(axis_angle(R_f2q), axis_f2q);
                        # MuJoCo q = angle_f2q - rest_dof; limits apply to q.
                        f2q = joint_f2q_data[joint_name]
                        offset_f2q = f2q["offset_f2q"]
                        axis_f2q = f2q["axis_f2q"]
                        rest_dof = f2q["rest_dof_axis_angle"]
                        R_local = new_local_rot_mat_np.astype(np.float64)
                        R_f2q = offset_f2q @ R_local
                        rotvec = tf.SO3.from_matrix(R_f2q).log()
                        angle_f2q = float(np.dot(rotvec, axis_f2q))
                        # Keep angle continuous relative to current pose.
                        current_R_f2q = offset_f2q @ (
                            self.joints_local_rot[self.cur_frame_idx, i].detach().cpu().numpy().astype(np.float64)
                        )
                        current_angle_f2q = float(np.dot(tf.SO3.from_matrix(current_R_f2q).log(), axis_f2q))
                        two_pi = 2.0 * np.pi
                        angle_f2q = angle_f2q + two_pi * np.round((current_angle_f2q - angle_f2q) / two_pi)
                        q = angle_f2q - rest_dof
                        if joint_limits is not None:
                            joint_limit = joint_limits.get(joint_name)
                            if joint_limit is not None:
                                q = float(np.clip(q, joint_limit[0], joint_limit[1]))
                        angle_f2q = q + rest_dof
                        R_f2q_new = tf.SO3.exp(angle_f2q * axis_f2q).as_matrix()
                        new_local_rot_mat_np = (offset_f2q.T @ R_f2q_new).astype(np.float32)
                    elif joint_axis_indices is not None:
                        axis_idx = joint_axis_indices.get(joint_name)
                        if axis_idx is not None:
                            rotvec = new_local_rot.log()
                            axis = np.zeros(3, dtype=np.float64)
                            axis[axis_idx] = 1.0
                            angle = float(rotvec[axis_idx])
                            # Keep angle continuous relative to current pose.
                            current_rot = tf.SO3.from_matrix(
                                self.joints_local_rot[self.cur_frame_idx, i].detach().cpu().numpy()
                            )
                            current_angle = float(current_rot.log()[axis_idx])
                            two_pi = 2.0 * np.pi
                            angle = angle + two_pi * np.round((current_angle - angle) / two_pi)
                            if joint_limits is not None:
                                joint_limit = joint_limits.get(joint_name)
                                if joint_limit is not None:
                                    angle = float(np.clip(angle, joint_limit[0], joint_limit[1]))
                            new_local_rot_mat_np = tf.SO3.exp(angle * axis).as_matrix()
                    new_local_rot_mat = torch.tensor(new_local_rot_mat_np).to(new_local_joint_rots.device)
                    new_local_joint_rots[i] = new_local_rot_mat

                    self.update_pose_at_frame(
                        self.cur_frame_idx,
                        joints_local_rot=new_local_joint_rots,
                    )

                    # handle root translation separately
                    cur_joints_pos = self.joints_pos[self.cur_frame_idx].clone()
                    if i == self.skeleton.root_idx:
                        new_root_pos = to_torch(
                            self.joint_gizmos[i].position,
                            device=self.joints_pos.device,
                        ).to(dtype=self.joints_pos.dtype)
                        root_diff = new_root_pos - self.joints_pos[self.cur_frame_idx, i]
                        if torch.norm(root_diff) > 1e-3:
                            # the root translation has been changed
                            # translate to gizmo position
                            cur_joints_pos += root_diff[None]
                            self.update_pose_at_frame(
                                self.cur_frame_idx,
                                joints_pos=cur_joints_pos,
                                joints_rot=self.joints_rot[self.cur_frame_idx],
                                joints_local_rot=self.joints_local_rot[self.cur_frame_idx],
                            )

                    # update immediately to show user changes. Keep updating_joint_gizmos
                    # True so set_frame does not overwrite gizmo wxyz mid-drag.
                    self.set_frame(self.cur_frame_idx)
                    self.updating_joint_gizmos = False

                    if i == self.skeleton.root_idx:
                        # update the 2D waypoint constraints as well if there is one
                        if "2D Root" in constraints:
                            root_2d_contraints = constraints["2D Root"]
                            # if there is a constraint at that frame, we want to update it
                            frame_idx = self.cur_frame_idx
                            if frame_idx in root_2d_contraints.keyframes:
                                new_root_pos[1] = 0.0  # force y to 0
                                for keyframe_id in root_2d_contraints.frame2keyid[frame_idx]:
                                    # add will modify the existing constraint
                                    root_2d_contraints.add_keyframe(
                                        keyframe_id,
                                        frame_idx,
                                        root_pos=new_root_pos,
                                        exists_ok=True,
                                        update_path=False,
                                    )

                    if "Full-Body" in constraints:
                        full_body_constraints = constraints["Full-Body"]
                        # if there is a constraint at that frame, we want to update it
                        frame_idx = self.cur_frame_idx
                        if frame_idx in full_body_constraints.keyframes:
                            for keyframe_id in full_body_constraints.frame2keyid[frame_idx]:
                                # add will modify the existing constraint
                                full_body_constraints.add_keyframe(
                                    keyframe_id,
                                    frame_idx,
                                    joints_pos=self.joints_pos[frame_idx],
                                    joints_rot=self.joints_rot[frame_idx],
                                    exists_ok=True,
                                )
                    if "End-Effectors" in constraints:
                        end_effector_constraints = constraints["End-Effectors"]
                        # if there is a constraint at that frame, we want to update it
                        frame_idx = self.cur_frame_idx
                        if frame_idx in end_effector_constraints.keyframes:
                            current_dict = end_effector_constraints.keyframes[frame_idx]
                            for keyframe_id, _ in end_effector_constraints.frame2keyid[frame_idx]:
                                # add will modify the existing constraint
                                end_effector_constraints.add_keyframe(
                                    keyframe_id,
                                    frame_idx,
                                    joints_pos=self.joints_pos[frame_idx],
                                    joints_rot=self.joints_rot[frame_idx],
                                    joint_names=current_dict["joint_names"],
                                    end_effector_type=current_dict["end_effector_type"],
                                    exists_ok=True,
                                )

            set_callback_in_closure(joint_idx)

    def clear_all_gizmos(self):
        self.updating_root_translation_gizmo = True
        self.updating_joint_gizmos = True
        if self.root_translation_gizmo is not None:
            self.server.scene.remove_by_name(self.root_translation_gizmo.name)
            self.root_translation_gizmo = None
        if self.joint_gizmos is not None:
            for joint_gizmo in self.joint_gizmos:
                self.server.scene.remove_by_name(joint_gizmo.name)
            self.joint_gizmos = None
        self._drag_start_world_rot = []
        self._joint_gizmo_dragging = []
        self.updating_root_translation_gizmo = False
        self.updating_joint_gizmos = False


================================================
FILE: kimodo/viz/scene.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Viser scene entities: waypoints, skeleton mesh, and character."""

import os
import traceback
from pathlib import Path
from typing import Optional, Tuple

import numpy as np
import torch
import trimesh

import viser
import viser.transforms as tf
from kimodo.skeleton import (
    G1Skeleton34,
    SkeletonBase,
    SMPLXSkeleton22,
    SOMASkeleton30,
    SOMASkeleton77,
)

from .coords import rotation_matrix_from_two_vec
from .g1_rig import (
    G1MeshRig,
)
from .smplx_skin import SMPLXSkin
from .soma_skin import SOMASkin


class WaypointMesh:
    def __init__(
        self,
        name: str,
        server: viser.ViserServer,
        position: np.ndarray,
        heading: Optional[np.ndarray] = None,
        color: Optional[Tuple[int, int, int]] = (255, 0, 0),
    ):
        self.server = server

        sphere = trimesh.creation.icosphere(subdivisions=3, radius=0.025)
        annulus = trimesh.creation.annulus(r_min=0.1, r_max=0.2, height=0.005)

        z_to_y_up = np.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
        annulus_vertices = annulus.vertices @ z_to_y_up

        self.sphere = self.server.scene.add_mesh_simple(
            name=f"{name}/sphere",
            vertices=sphere.vertices,
            faces=sphere.faces,
            position=position,
            color=color,
        )
        self.annulus = self.server.scene.add_mesh_simple(
            name=f"{name}/annulus",
            vertices=annulus_vertices,
            faces=annulus.faces,
            position=position,
            color=color,
        )

        self.arrow_base = None
        self.arrow_head = None
        if heading is not None:
            assert heading.shape == (2,), "Heading must be a 2D vector"
            heading = 0.3 * (heading / np.linalg.norm(heading))
            heading_3d = np.array([heading[0], 0, heading[1]])
            arrow_base = trimesh.creation.cylinder(radius=0.01, height=0.3)
            arrow_head = trimesh.creation.cone(radius=0.03, height=0.075)
            arrow_base_vertices = arrow_base.vertices
            arrow_head_vertices = arrow_head.vertices
            self.arrow_base = self.server.scene.add_mesh_simple(
                name=f"{name}/arrow_base",
                vertices=arrow_base_vertices,
                faces=arrow_base.faces,
                position=position + (heading_3d / 2),
                color=color,
            )
            self.arrow_head = self.server.scene.add_mesh_simple(
                name=f"{name}/arrow_head",
                vertices=arrow_head_vertices,
                faces=arrow_head.faces,
                position=position + heading_3d,
                color=color,
            )

    def update_position(self, position: np.ndarray, heading: Optional[np.ndarray] = None):
        self.sphere.position = position
        self.annulus.position = position
        if heading is not None:
            assert heading.shape == (2,), "Heading must be a 2D vector"
            heading = 0.3 * (heading / np.linalg.norm(heading))
            heading_3d = np.array([heading[0], 0, heading[1]])
            if self.arrow_base is not None:
                self.arrow_base.position = position + (heading_3d / 2)
            if self.arrow_head is not None:
                self.arrow_head.position = position + heading_3d

    def clear(self):
        self.server.scene.remove_by_name(self.sphere.name)
        self.server.scene.remove_by_name(self.annulus.name)
        if self.arrow_base is not None:
            self.server.scene.remove_by_name(self.arrow_base.name)
        if self.arrow_head is not None:
            self.server.scene.remove_by_name(self.arrow_head.name)

    def set_visible(self, visible: bool) -> None:
        self.sphere.visible = visible
        self.annulus.visible = visible
        if self.arrow_base is not None:
            self.arrow_base.visible = visible
        if self.arrow_head is not None:
            self.arrow_head.visible = visible


class SkeletonMesh:
    def __init__(
        self,
        name: str,
        server: viser.ViserServer,
        skeleton: SkeletonBase,
        joint_color: Optional[Tuple[float, float, float] | np.ndarray] = (
            255,
            235,
            0,
        ),
        bone_color: Optional[Tuple[float, float, float] | np.ndarray] = (
            27,
            106,
            0,
        ),
        starting_joints_pos: Optional[torch.Tensor] = None,
    ):
        """
        name: str, name of the skeleton mesh
        server: viser.ViserServer, server to add the skeleton mesh to
        skeleton: SkeletonBase, skeleton to visualize
        joint_color: Optional[Tuple[float, float, float] | np.ndarray], color of the joints
        bone_color: Optional[Tuple[float, float, float] | np.ndarray], color of the bones
        starting_joints_pos: Optional[torch.Tensor], starting joint positions
        """
        self.server = server
        self.skeleton = skeleton
        joint_mesh = trimesh.creation.icosphere(subdivisions=3, radius=0.02)
        bone_mesh = trimesh.creation.cylinder(radius=0.01, height=1.0)

        init_joints_pos = skeleton.neutral_joints.clone()
        self.num_joints = init_joints_pos.shape[0]
        num_bones = self.num_joints - 1
        non_root_bones = [
            joint_name
            for joint_name, parent_name in self.skeleton.bone_order_names_with_parents
            if parent_name is not None
        ]
        self.bone_to_idx = {bone_name: idx for idx, bone_name in enumerate(non_root_bones)}

        # initialize meshes
        init_joints_wxyzs = np.concatenate([np.ones((self.num_joints, 1)), np.zeros((self.num_joints, 3))], axis=1)
        if isinstance(joint_color, tuple):
            self.joint_colors = np.full((self.num_joints, 3), joint_color)
        elif isinstance(joint_color, np.ndarray):
            assert joint_color.shape == (
                self.num_joints,
                3,
            ), "Joint colors must be (J, 3)"
            self.joint_colors = joint_color
        joint_scales = np.ones((self.num_joints, 3))
        hand_roots = {"LeftHand", "RightHand"}
        finger_joint_names = set(skeleton.left_hand_joint_names + skeleton.right_hand_joint_names) - hand_roots
        for jname in finger_joint_names:
            if jname in skeleton.bone_index:
                joint_scales[skeleton.bone_index[jname]] = 0.6
        self.joint_scales = joint_scales

        self.joints_batched_mesh = server.scene.add_batched_meshes_simple(
            f"{name}/joints",
            vertices=joint_mesh.vertices,
            faces=joint_mesh.faces,
            batched_wxyzs=init_joints_wxyzs,
            batched_positions=np.zeros((self.num_joints, 3)),
            batched_scales=joint_scales,
            batched_colors=self.joint_colors,
        )
        init_bones_wxyzs = np.concatenate([np.ones((num_bones, 1)), np.zeros((num_bones, 3))], axis=1)
        if isinstance(bone_color, tuple):
            bone_color = np.full((num_bones, 3), bone_color)
        elif isinstance(bone_color, np.ndarray):
            assert bone_color.shape == (num_bones, 3), "Bone colors must be (J-1, 3)"
            bone_color = bone_color
        self.bones_batched_mesh = server.scene.add_batched_meshes_simple(
            f"{name}/bones",
            vertices=bone_mesh.vertices,
            faces=bone_mesh.faces,
            batched_wxyzs=init_bones_wxyzs,
            batched_positions=np.zeros((num_bones, 3)),
            batched_scales=np.ones((num_bones, 3)),
            batched_colors=bone_color,
        )

        self.mesh_info_cache = None

        if starting_joints_pos is not None:
            self.set_pose(starting_joints_pos)
        else:
            if isinstance(skeleton, SOMASkeleton77):
                skel30 = SOMASkeleton30(load=True)
                min_height = skel30.neutral_joints[:, 1].min().item()
            else:
                min_height = init_joints_pos[:, 1].min().item()
            init_joints_pos[:, 1] -= min_height  # move to be on ground
            self.set_pose(init_joints_pos)

    def compute_single_pose(self, joints_pos: np.ndarray):
        """Compute the mesh for a single frame.

        joints_pos: [J, 3] global joint positions.
        """
        new_batched_positions = np.zeros((self.skeleton.nbjoints - 1, 3))
        new_batched_wxyzs = np.zeros((self.skeleton.nbjoints - 1, 4))
        new_batched_scales = np.ones((self.skeleton.nbjoints - 1, 3))
        for joint_name, parent_name in self.skeleton.bone_order_names_with_parents:
            if parent_name is None:
                continue
            joint_idx = self.skeleton.bone_index[joint_name]
            parent_idx = self.skeleton.bone_index[parent_name]
            joint_pos = joints_pos[joint_idx]
            parent_pos = joints_pos[parent_idx]

            bone_pos = (joint_pos + parent_pos) / 2.0
            bone_scale = np.linalg.norm(joint_pos - parent_pos)
            if bone_scale < 1e-8:
                bone_wxyz = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float64)
            else:
                bone_dir = (joint_pos - parent_pos) / bone_scale
                R = rotation_matrix_from_two_vec(np.array([0.0, 0.0, 1.0], dtype=np.float64), bone_dir)
                bone_wxyz = tf.SO3.from_matrix(R).wxyz

            bone_idx = self.bone_to_idx[joint_name]
            new_batched_positions[bone_idx] = bone_pos
            new_batched_wxyzs[bone_idx] = bone_wxyz
            new_batched_scales[bone_idx] = np.array([1.0, 1.0, bone_scale], dtype=float)

        return new_batched_positions, new_batched_wxyzs, new_batched_scales

    def precompute_mesh_info(self, joints_pos: torch.Tensor):
        """Precompute the meshes for all frames at once.

        joints_pos: [T, J, 3].
        """
        joints_pos = joints_pos.cpu().numpy()
        num_frames = joints_pos.shape[0]
        self.mesh_info_cache = {
            "positions": np.zeros((num_frames, self.skeleton.nbjoints - 1, 3)),
            "wxyzs": np.zeros((num_frames, self.skeleton.nbjoints - 1, 4)),
            "scales": np.ones((num_frames, self.skeleton.nbjoints - 1, 3)),
        }
        for i in range(num_frames):
            new_batched_positions, new_batched_wxyzs, new_batched_scales = self.compute_single_pose(joints_pos[i])
            self.mesh_info_cache["positions"][i] = new_batched_positions
            self.mesh_info_cache["wxyzs"][i] = new_batched_wxyzs
            self.mesh_info_cache["scales"][i] = new_batched_scales

    def update_mesh_info_cache(self, joints_pos: torch.Tensor, frame_idx: int):
        """Update the mesh info cache for the given frame."""
        assert self.mesh_info_cache is not None
        new_batched_positions, new_batched_wxyzs, new_batched_scales = self.compute_single_pose(
            joints_pos.cpu().numpy()
        )
        self.mesh_info_cache["positions"][frame_idx] = new_batched_positions
        self.mesh_info_cache["wxyzs"][frame_idx] = new_batched_wxyzs
        self.mesh_info_cache["scales"][frame_idx] = new_batched_scales

    def set_pose(
        self,
        joints_pos: torch.Tensor,
        foot_contacts: Optional[torch.Tensor] = None,
        frame_idx: Optional[int] = None,
    ):
        """Set pose from [J, 3] global joint positions."""
        self.cur_joints_pos = joints_pos
        joints_pos = joints_pos.cpu().numpy()

        if self.mesh_info_cache is not None:
            assert frame_idx is not None
            new_batched_positions = self.mesh_info_cache["positions"][frame_idx]
            new_batched_wxyzs = self.mesh_info_cache["wxyzs"][frame_idx]
            new_batched_scales = self.mesh_info_cache["scales"][frame_idx]
        else:
            new_batched_positions, new_batched_wxyzs, new_batched_scales = self.compute_single_pose(joints_pos)

        self.bones_batched_mesh.batched_positions = new_batched_positions
        self.bones_batched_mesh.batched_wxyzs = new_batched_wxyzs
        self.bones_batched_mesh.batched_scales = new_batched_scales
        self.joints_batched_mesh.batched_positions = joints_pos

        if foot_contacts is not None:
            cur_joint_colors = self.joint_colors.copy()
            foot_contacts = foot_contacts.bool().cpu().numpy().astype(bool)
            foot_joints = np.array(self.skeleton.foot_joint_idx, dtype=int)
            contact_idx = foot_joints[foot_contacts]
            cur_joint_colors[contact_idx] = (255, 0, 0)
            self.joints_batched_mesh.batched_colors = cur_joint_colors
        else:
            self.joints_batched_mesh.batched_colors = self.joint_colors

    def set_visibility(self, visible: bool):
        self.joints_batched_mesh.visible = visible
        self.bones_batched_mesh.visible = visible

    def get_pose(self) -> np.ndarray:
        return self.cur_joints_pos

    def clear(self):
        names = [mesh.name for mesh in [self.joints_batched_mesh, self.bones_batched_mesh]]
        for name in names:
            self.server.scene.remove_by_name(name)


LIGHT_THEME = dict(
    mesh=(152, 189, 255),
)

DARK_THEME = dict(
    mesh=(100, 135, 195),
)

SKIN_CACHE = {}


class Character:
    def __init__(
        self,
        name: str,
        server: viser.ViserServer | viser.ClientHandle,
        skeleton: SkeletonBase,
        create_skeleton_mesh: bool = True,
        create_skinned_mesh: bool = True,
        visible_skeleton: bool = False,
        visible_skinned_mesh: bool = True,
        skinned_mesh_opacity: float = 1.0,
        show_foot_contacts: bool = True,
        dark_mode: bool = False,
        mesh_mode: Optional[str] = None,
        gui_use_soma_layer_checkbox: Optional[viser.GuiCheckboxHandle] = None,
    ):
        self.server = server
        self.name = name
        self.skeleton = skeleton
        self.cur_joints_pos = None
        self.cur_joints_rot = None
        self.cur_foot_contacts = None

        self.skeleton_mesh = None
        self.show_foot_contacts = show_foot_contacts
        if create_skeleton_mesh:
            self.skeleton_mesh = SkeletonMesh(f"/{name}/skeleton", server, skeleton)
            self.cur_joints_pos = self.skeleton_mesh.get_pose()
            self.skeleton_mesh.set_visibility(visible_skeleton)

        self.skinned_mesh = None
        self.skin = None
        self.mesh_mode = mesh_mode
        self.g1_mesh_rig = None
        if create_skinned_mesh:
            if isinstance(self.skeleton, (SOMASkeleton30, SOMASkeleton77)) and mesh_mode in [
                "soma_skin",
                "soma_layer_skin",
            ]:
                if mesh_mode in SKIN_CACHE:
                    # already okay
                    pass
                else:
                    if mesh_mode == "soma_layer_skin":
                        try:
                            # try importing the lib
                            from .soma_layer_skin import SOMASkin as SOMASkin_SOMA

                            if mesh_mode not in SKIN_CACHE:
                                SKIN_CACHE[mesh_mode] = SOMASkin_SOMA(self.skeleton)

                        except (ModuleNotFoundError, FileNotFoundError) as e:
                            if isinstance(e, ModuleNotFoundError):
                                msg = "SOMA layer skin is unavailable: the soma package is not installed."
                            else:
                                msg = "SOMA layer skin is unavailable: SOMA asset files are missing."
                            traceback.print_exc()
                            if hasattr(self.server, "add_notification"):
                                self.server.add_notification(
                                    "SOMA layer skin unavailable",
                                    msg,
                                    auto_close_seconds=5.0,
                                    with_close_button=True,
                                )
                            if gui_use_soma_layer_checkbox is not None:
                                gui_use_soma_layer_checkbox.value = False
                            mesh_mode = "soma_skin"

                    # another if, in case mesh_mode changed
                    if mesh_mode == "soma_skin" and mesh_mode not in SKIN_CACHE:
                        SKIN_CACHE[mesh_mode] = SOMASkin(self.skeleton)

                self.skin = SKIN_CACHE[mesh_mode]
                self.skinned_mesh = server.scene.add_mesh_simple(
                    f"/{name}/simple_skinned",
                    vertices=self.skin.bind_vertices.cpu().numpy(),
                    faces=self.skin.faces.cpu().numpy(),
                    opacity=None,
                    color=LIGHT_THEME["mesh"] if not dark_mode else DARK_THEME["mesh"],
                    wireframe=False,
                    visible=False,
                )
                self.skinned_verts_cache = None

                bind_pos = self.skeleton.neutral_joints.clone()
                if isinstance(self.skeleton, SOMASkeleton77):
                    skel30 = SOMASkeleton30(load=True)
                    min_height = skel30.neutral_joints[:, 1].min().item()
                else:
                    min_height = bind_pos[:, 1].min().item()
                bind_pos[:, 1] -= min_height
                bind_pos[:, 1] += 0.02
                bind_rotmat = torch.eye(3, device=bind_pos.device).repeat(bind_pos.shape[0], 1, 1)
                self.set_pose(bind_pos, bind_rotmat)
                self.skinned_mesh.visible = True
                self.set_skinned_mesh_visibility(visible_skinned_mesh)
                self.set_skinned_mesh_opacity(skinned_mesh_opacity)
            elif isinstance(self.skeleton, SMPLXSkeleton22) and mesh_mode == "smplx_skin":
                if mesh_mode not in SKIN_CACHE:
                    SKIN_CACHE[mesh_mode] = SMPLXSkin(self.skeleton)
                self.skin = SKIN_CACHE[mesh_mode]
                self.skinned_mesh = server.scene.add_mesh_simple(
                    f"/{name}/simple_skinned",
                    vertices=self.skin.bind_vertices.cpu().numpy(),
                    faces=self.skin.faces.cpu().numpy(),
                    opacity=None,
                    color=LIGHT_THEME["mesh"] if not dark_mode else DARK_THEME["mesh"],
                    wireframe=False,
                    visible=False,
                )
                self.skinned_verts_cache = None

                bind_pos = self.skeleton.neutral_joints.clone()
                min_height = bind_pos[:, 1].min().item()
                bind_pos[:, 1] -= min_height
                bind_rotmat = torch.eye(3, device=bind_pos.device).repeat(bind_pos.shape[0], 1, 1)
                self.set_pose(bind_pos, bind_rotmat)
                self.skinned_mesh.visible = True
                self.set_skinned_mesh_visibility(visible_skinned_mesh)
                self.set_skinned_mesh_opacity(skinned_mesh_opacity)
            elif isinstance(self.skeleton, G1Skeleton34) and mesh_mode == "g1_stl":
                g1_mesh_dir = Path(self.skeleton.folder) / "meshes/g1"
                if not os.path.exists(g1_mesh_dir):
                    raise ValueError(f"G1 mesh directory not found: {g1_mesh_dir}")
                self.g1_mesh_rig = G1MeshRig(
                    name,
                    server,
                    self.skeleton,
                    str(g1_mesh_dir),
                    DARK_THEME["mesh"] if dark_mode else LIGHT_THEME["mesh"],
                )
                init_joints_rot = self.skeleton.rest_pose_local_rot.clone()
                init_global_joint_rots, _, init_joints_pos = self.skeleton.fk(
                    init_joints_rot,
                    torch.zeros(3, device=init_joints_rot.device, dtype=init_joints_rot.dtype),
                )
                min_height = init_joints_pos[:, 1].min().item()
                init_joints_pos[:, 1] -= min_height
                self.set_pose(init_joints_pos, init_global_joint_rots)
                self.set_skinned_mesh_visibility(visible_skinned_mesh)
                self.set_skinned_mesh_opacity(skinned_mesh_opacity)
            else:
                raise ValueError(
                    "Unsupported mesh mode for skeleton type: "
                    f"{type(self.skeleton).__name__} with mesh_mode={mesh_mode}"
                )

    def change_theme(self, is_dark_mode):
        color = DARK_THEME["mesh"] if is_dark_mode else LIGHT_THEME["mesh"]
        if self.skinned_mesh is not None:
            self.skinned_mesh.color = color
        if self.g1_mesh_rig is not None:
            self.g1_mesh_rig.set_color(color)

    def set_skeleton_visibility(self, visible: bool):
        if self.skeleton_mesh is not None:
            self.skeleton_mesh.set_visibility(visible)

    def set_show_foot_contacts(self, show: bool, frame_idx: Optional[int] = None):
        self.show_foot_contacts = show
        if self.skeleton_mesh is not None and self.cur_joints_pos is not None:
            fc = self.cur_foot_contacts if show else None
            self.skeleton_mesh.set_pose(self.cur_joints_pos, foot_contacts=fc, frame_idx=frame_idx)

    def set_skinned_mesh_visibility(self, visible: bool):
        if self.skinned_mesh is not None:
            self.skinned_mesh.visible = visible
        if self.g1_mesh_rig is not None:
            self.g1_mesh_rig.set_visibility(visible)

    def set_skinned_mesh_opacity(self, opacity: float):
        if self.skinned_mesh is not None:
            self.skinned_mesh.opacity = opacity
        if self.g1_mesh_rig is not None:
            self.g1_mesh_rig.set_opacity(opacity)

    def set_skinned_mesh_wireframe(self, wireframe: bool):
        if self.skinned_mesh is not None:
            self.skinned_mesh.wireframe = wireframe
        if self.g1_mesh_rig is not None:
            self.g1_mesh_rig.set_wireframe(wireframe)

    def precompute_skinning(self, joints_pos: torch.Tensor, joints_rot: torch.Tensor, chunk_size: int = 64):
        """Precompute skinning for all frames, processing in chunks to avoid OOM.

        joints_pos: [T, J, 3], joints_rot: [T, J, 3, 3].

        The LBS gather intermediate is ~V*W*48 bytes per frame (V=18k, W=8 for SOMA
        gives ~7 MB/frame), so a chunk of 64 peaks around ~1 GB -- safe alongside
        a loaded text encoder + diffusion model on a typical 24 GB GPU.
        """
        assert self.skin is not None
        T = joints_pos.shape[0]
        with torch.no_grad():
            if T <= chunk_size:
                self.skinned_verts_cache = self.skin.skin(joints_rot, joints_pos, rot_is_global=True).cpu().numpy()
            else:
                chunks = []
                for start in range(0, T, chunk_size):
                    end = min(start + chunk_size, T)
                    verts = self.skin.skin(joints_rot[start:end], joints_pos[start:end], rot_is_global=True).cpu().numpy()
                    chunks.append(verts)
                self.skinned_verts_cache = np.concatenate(chunks, axis=0)

    def update_skinning_cache(self, joints_pos: torch.Tensor, joints_rot: torch.Tensor, frame_idx: int):
        """Update skinning cache for one frame."""
        if self.skinned_verts_cache is None:
            return
        with torch.no_grad():
            new_skinned_verts = self.skin.skin(joints_rot[None], joints_pos[None], rot_is_global=True)[0].cpu().numpy()
        self.skinned_verts_cache[frame_idx] = new_skinned_verts

    def set_pose(
        self,
        joints_pos: torch.Tensor,
        joints_rot: torch.Tensor,
        foot_contacts: Optional[torch.Tensor] = None,
        frame_idx: Optional[int] = None,
    ):
        if self.skeleton_mesh is not None:
            self.cur_foot_contacts = foot_contacts
            display_fc = foot_contacts if self.show_foot_contacts else None
            self.skeleton_mesh.set_pose(joints_pos, foot_contacts=display_fc, frame_idx=frame_idx)

        if self.skinned_mesh is not None:
            if self.skinned_verts_cache is not None:
                assert frame_idx is not None
                skinned_verts = self.skinned_verts_cache[frame_idx]
            else:
                with torch.no_grad():
                    skinned_verts = self.skin.skin(joints_rot[None], joints_pos[None], rot_is_global=True)[0].cpu().numpy()
            self.skinned_mesh.vertices = skinned_verts
        if self.g1_mesh_rig is not None:
            joints_pos_np = joints_pos.detach().cpu().numpy()
            joints_rot_np = joints_rot.detach().cpu().numpy()
            self.g1_mesh_rig.set_pose(joints_pos_np, joints_rot_np)

        self.cur_joints_pos = joints_pos
        self.cur_joints_rot = joints_rot

    def get_pose(self) -> torch.Tensor:
        return self.cur_joints_pos, self.cur_joints_rot

    def clear(self):
        if self.skeleton_mesh is not None:
            self.skeleton_mesh.clear()
        if self.skinned_mesh is not None:
            self.server.scene.remove_by_name(self.skinned_mesh.name)
        if self.g1_mesh_rig is not None:
            self.g1_mesh_rig.clear()


================================================
FILE: kimodo/viz/smplx_skin.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""SMPL-X skinning and joint mapping for visualization."""

import os
import warnings
from pathlib import Path

import numpy as np
import torch

from kimodo.geometry import axis_angle_to_matrix
from kimodo.skeleton import SMPLXSkeleton22, batch_rigid_transform

SKIN_NAME = "SMPLX_NEUTRAL.npz"
BETA_NAME = "beta.npy"
MEAN_HANDS_NAME = "mean_hands.npy"

SMPLX_BODY_JOINT_NAME_MAP = {
    "pelvis": "Pelvis",
    "left_hip": "L_Hip",
    "right_hip": "R_Hip",
    "spine1": "Spine1",
    "left_knee": "L_Knee",
    "right_knee": "R_Knee",
    "spine2": "Spine2",
    "left_ankle": "L_Ankle",
    "right_ankle": "R_Ankle",
    "spine3": "Spine3",
    "left_foot": "L_Foot",
    "right_foot": "R_Foot",
    "neck": "Neck",
    "left_collar": "L_Collar",
    "right_collar": "R_Collar",
    "head": "Head",
    "left_shoulder": "L_Shoulder",
    "right_shoulder": "R_Shoulder",
    "left_elbow": "L_Elbow",
    "right_elbow": "R_Elbow",
    "left_wrist": "L_Wrist",
    "right_wrist": "R_Wrist",
}

# SMPL-X hand pose order (15 joints per hand) matching SMPL-X index order.
SMPLX_HAND_JOINT_ORDER = [
    "Index1",
    "Index2",
    "Index3",
    "Middle1",
    "Middle2",
    "Middle3",
    "Pinky1",
    "Pinky2",
    "Pinky3",
    "Ring1",
    "Ring2",
    "Ring3",
    "Thumb1",
    "Thumb2",
    "Thumb3",
]

SMPLX_FACE_JOINT_NAMES = ["Jaw", "L_Eye", "R_Eye"]


class SMPLXSkin:
    def __init__(
        self,
        skeleton,
        use_mean_hands=True,
    ):
        skel_dir = Path(skeleton.folder)
        skin_data_path = skel_dir / SKIN_NAME

        if not skin_data_path.exists():
            raise FileExistsError(
                f"You should download the {SKIN_NAME} from the smplx website, and put it there: {skin_data_path}"
            )

        beta_path = skel_dir / BETA_NAME
        mean_hands_path = skel_dir / MEAN_HANDS_NAME

        self.skeleton = skeleton
        assert isinstance(skeleton, SMPLXSkeleton22), "SMPLXSkin only supports SMPLXSkeleton22"
        assert skeleton.neutral_joints is not None, "SMPLXSkeleton22 must have neutral joints instantiated"

        device = skeleton.neutral_joints.device
        with warnings.catch_warnings():
            # Ignore legacy object-dtype warning emitted while unpickling old SMPL-X assets.
            warnings.filterwarnings(
                "ignore",
                message=r"dtype\(\): align should be passed as Python or NumPy boolean.*",
                category=Warning,
                module=r"numpy\.lib\._format_impl",
            )
            # np.load on .npz is lazy; materialize all fields while filter is active.
            with np.load(skin_data_path, allow_pickle=True) as skin_npz:
                skin_data = {key: skin_npz[key] for key in skin_npz.files}

        joint2num = skin_data["joint2num"]
        if isinstance(joint2num, np.ndarray):
            joint2num = joint2num.item()
        self.full_joint_count = int(skin_data["weights"].shape[1])
        kintree_table = np.array(skin_data["kintree_table"], dtype=np.int64)
        parents = kintree_table[0].copy()
        parents[parents > 1_000_000_000] = -1
        self.full_joint_parents = torch.tensor(parents, device=device, dtype=torch.long)
        root_candidates = np.where(parents == -1)[0]
        self.full_root_idx = int(root_candidates[0]) if root_candidates.size else 0
        self.joint_regressor = torch.tensor(
            np.array(skin_data["J_regressor"], dtype=np.float32),
            device=device,
            dtype=torch.float,
        )

        rig_joint_names = []
        rig_joint_indices = []
        for joint_name in self.skeleton.bone_order_names:
            mapped_name = SMPLX_BODY_JOINT_NAME_MAP.get(joint_name)
            if mapped_name is None or mapped_name not in joint2num:
                raise ValueError(f"Missing SMPL-X joint mapping for '{joint_name}'")
            rig_joint_names.append(mapped_name)
            rig_joint_indices.append(int(joint2num[mapped_name]))
        self.body_joint_indices = np.array(rig_joint_indices, dtype=np.int64)

        # Prepare mean hand pose rotations for joints not produced by the model.
        if use_mean_hands and mean_hands_path is not None and os.path.exists(mean_hands_path):
            mean_hands = np.array(np.load(mean_hands_path), dtype=np.float32)
        else:
            mean_hands = np.zeros(90, dtype=np.float32)
        if mean_hands.shape[0] != 90:
            raise ValueError(f"Expected mean_hands shape (90,), got {mean_hands.shape}")
        mean_hands = mean_hands.reshape(30, 3)
        mean_hands_rotmats = axis_angle_to_matrix(torch.tensor(mean_hands, device=device, dtype=torch.float))
        left_hand_joint_names = [f"L_{name}" for name in SMPLX_HAND_JOINT_ORDER]
        right_hand_joint_names = [f"R_{name}" for name in SMPLX_HAND_JOINT_ORDER]
        left_indices = [joint2num[name] for name in left_hand_joint_names]
        right_indices = [joint2num[name] for name in right_hand_joint_names]
        self.hand_joint_indices = np.array(left_indices + right_indices, dtype=np.int64)
        self.mean_hand_rotmats = mean_hands_rotmats
        face_indices = [joint2num[name] for name in SMPLX_FACE_JOINT_NAMES if name in joint2num]
        self.face_joint_indices = np.array(face_indices, dtype=np.int64)
        self.mean_face_rotmats = torch.eye(3, device=device).repeat(len(self.face_joint_indices), 1, 1)

        # bind_rig_transform: [J, 4, 4]
        # bind_vertices: [V, 3]
        # faces: [F, 3]
        # lbs indices, lbs weights: [V, W] (W = number of joints)
        v_template = np.array(skin_data["v_template"], dtype=np.float32)
        faces = np.array(skin_data["f"], dtype=np.int64)
        weights = np.array(skin_data["weights"], dtype=np.float32)

        shapedirs = np.array(skin_data["shapedirs"], dtype=np.float32)
        posedirs = np.array(skin_data["posedirs"], dtype=np.float32)

        if beta_path is not None and os.path.exists(beta_path):
            betas = np.array(np.load(beta_path), dtype=np.float32)
        else:
            betas = np.zeros(300, dtype=np.float32)

        num_shape_coeffs = shapedirs.shape[2]  # 400 = 300 + 100 (shape + expression)
        if betas.shape[0] < num_shape_coeffs:
            betas = np.pad(betas, (0, num_shape_coeffs - betas.shape[0]), mode="constant")
        elif betas.shape[0] > num_shape_coeffs:
            betas = betas[:num_shape_coeffs]

        v_shaped = v_template + np.tensordot(shapedirs, betas, axes=[2, 0])
        self.v_shaped = torch.tensor(v_shaped, device=device, dtype=torch.float)
        self.posedirs = torch.tensor(posedirs, device=device, dtype=torch.float)
        self.joint_rest = torch.einsum("jv,vc->jc", self.joint_regressor, self.v_shaped)

        # Align SMPL-X body rest joints to the model skeleton rest pose.
        body_rest = self.skeleton.neutral_joints.to(device=device, dtype=torch.float)
        if body_rest.shape[0] == self.body_joint_indices.shape[0]:
            # Treat mismatches as a warning and align to the skeleton pose anyway.
            max_delta = (self.joint_rest[self.body_joint_indices] - body_rest).abs().max()
            if max_delta > 1e-6:
                print(
                    "Warning: SMPL-X rest pose mismatch (max_delta="
                    f"{max_delta:.2e}); aligning to skeleton neutral joints."
                )
            self.joint_rest[self.body_joint_indices] = body_rest

        # Renormalize weights to avoid numerical issues.
        weight_sums = weights.sum(axis=1, keepdims=True)
        zero_mask = weight_sums[:, 0] < 1e-8
        weights = weights / np.clip(weight_sums, 1e-8, None)
        if np.any(zero_mask):
            weights[zero_mask, :] = 0.0
            weights[zero_mask, self.full_root_idx] = 1.0

        joint_indices = np.arange(self.full_joint_count, dtype=np.int64)
        lbs_indices = np.tile(joint_indices[None, :], (v_template.shape[0], 1))

        bind_rig_np = np.zeros((self.full_joint_count, 4, 4), dtype=np.float32)
        bind_rig_np[:, 3, 3] = 1.0
        bind_rig_np[:, :3, :3] = np.eye(3, dtype=np.float32)
        bind_rig_np[:, :3, 3] = self.joint_rest.detach().cpu().numpy()

        self.bind_rig_transform = torch.from_numpy(bind_rig_np).to(device=device, dtype=torch.float)
        bind_rig_inv_np = np.linalg.inv(bind_rig_np)
        self.bind_rig_transform_inv = torch.from_numpy(bind_rig_inv_np).to(device=device, dtype=torch.float)
        self.bind_vertices = torch.tensor(v_shaped, device=device, dtype=torch.float)
        self.faces = torch.tensor(faces, device=device, dtype=torch.long)
        self.lbs_indices = torch.tensor(lbs_indices, device=device, dtype=torch.long)
        self.lbs_weights = torch.tensor(weights, device=device, dtype=torch.float)

        # double check the rig matches expected skeleton order
        for sname, rname in zip(self.skeleton.bone_order_names, rig_joint_names):
            mapped_name = SMPLX_BODY_JOINT_NAME_MAP.get(sname)
            if mapped_name != rname:
                raise ValueError(f"MISMATCH in skinning rig: expected='{mapped_name}' vs rig='{rname}'")

    def lbs(self, posed_transform, bind_vertices=None):
        bind_rig_transform_inv = self.bind_rig_transform_inv
        if bind_vertices is None:
            bind_vertices = self.bind_vertices
        lbs_weights = self.lbs_weights
        # posed_transform: [B, F, J, 4, 4] or [B, J, 4, 4] or [J, 4, 4]
        # unsqueeze to match posed_transform batch dims
        batch_dims = posed_transform.shape[:-3]
        if bind_vertices.dim() == 2:
            for _ in batch_dims:
                bind_vertices = bind_vertices.unsqueeze(0)
        elif bind_vertices.dim() == 3:
            if len(batch_dims) == 1:
                if bind_vertices.shape[0] != batch_dims[0]:
                    bind_vertices = bind_vertices.unsqueeze(0)
            elif len(batch_dims) > 1:
                for _ in range(len(batch_dims) - 1):
                    bind_vertices = bind_vertices.unsqueeze(0)
        for _ in batch_dims:
            bind_rig_transform_inv = bind_rig_transform_inv.unsqueeze(0)
            lbs_weights = lbs_weights.unsqueeze(0)
        # bind_rig_transform_inv: [..., J, 4, 4]
        # bind_vertices: [..., V, 3]
        # lbs_weights: [..., V, W]

        affine_mat = (posed_transform @ bind_rig_transform_inv)[..., :3, :]  # [..., J, 3, 4]
        vs = (
            affine_mat[..., self.lbs_indices, :, :]
            @ torch.concat([bind_vertices, torch.ones_like(bind_vertices[..., 0:1])], dim=-1)[..., None, :, None]
        )  # [..., V, W, 3, 1]
        ws = lbs_weights[..., None, None]
        resv = (vs * ws).sum(dim=-3).squeeze(-1)  # [..., V, 3]
        return resv

    def skin(self, joint_rotmat, joint_pos, rot_is_global=False):
        """
        joint_rotmat: [T, J, 3, 3] local or global joint rotation matrices
        joint_pos: [T, J, 3] global joint positions
        rot_is_global: bool, if True, joint_rotmat is global rotation matrices,
        otherwise it is local rotation matrices and FK is performed internally
        """
        nF, nJ = joint_pos.shape[:2]
        device = joint_rotmat.device

        # import ipdb; ipdb.set_trace()
        if rot_is_global:
            if joint_rotmat.shape[1] == self.full_joint_count:
                local_rotmat_full = joint_rotmat.clone()
                parents = self.full_joint_parents.to(device)
                parent_rot_mats = local_rotmat_full[:, parents]
                parent_rot_mats[:, self.full_root_idx] = torch.eye(3, device=device)
                parent_rot_mats_inv = parent_rot_mats.transpose(2, 3)
                local_rotmat_full = torch.einsum(
                    "T N m n, T N n o -> T N m o",
                    parent_rot_mats_inv,
                    local_rotmat_full,
                )
            else:
                local_rotmat = self.skeleton.global_rots_to_local_rots(joint_rotmat)
        else:
            local_rotmat = joint_rotmat

        if rot_is_global and joint_rotmat.shape[1] == self.full_joint_count:
            full_local = local_rotmat_full
        else:
            full_local = torch.eye(3, device=device).reshape(1, 1, 3, 3).repeat(nF, self.full_joint_count, 1, 1)
            full_local[:, self.body_joint_indices] = local_rotmat
        if self.mean_hand_rotmats is not None:
            full_local[:, self.hand_joint_indices] = self.mean_hand_rotmats[None]
        if self.mean_face_rotmats is not None:
            full_local[:, self.face_joint_indices] = self.mean_face_rotmats[None]
        pose_feature = (full_local[:, 1:] - torch.eye(3, device=device)[None, None]).reshape(nF, -1)

        pose_offsets = torch.einsum("vcp,tp->tvc", self.posedirs, pose_feature)
        v_posed = self.v_shaped[None] + pose_offsets
        joints_rest = self.joint_rest[None].repeat(nF, 1, 1)
        posed_joints, global_joint_rots = batch_rigid_transform(
            full_local,
            joints_rest,
            self.full_joint_parents.to(device),
            self.full_root_idx,
        )
        # remove the skeleton offset of the root joint
        root_trans = joint_pos[:, self.skeleton.root_idx] - self.skeleton.neutral_joints[0:1]
        posed_joints = posed_joints + root_trans[:, None, :]

        fk_transform = torch.eye(4, device=device)[None, None].repeat(nF, self.full_joint_count, 1, 1)
        fk_transform[..., :3, :3] = global_joint_rots
        fk_transform[..., :3, 3] = posed_joints

        vertices = self.lbs(fk_transform, bind_vertices=v_posed)
        return vertices


================================================
FILE: kimodo/viz/soma_layer_skin.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""SOMA layer-based skinning for visualization (SOMASkeleton30 / SOMASkeleton77)."""

from pathlib import Path

import numpy as np
import torch
from huggingface_hub import snapshot_download
from soma import SomaLayer as SOMALayer

from kimodo.assets import SOMA_ASSETS_ROOT
from kimodo.skeleton import SOMASkeleton30, SOMASkeleton77, global_rots_to_local_rots

SOMA_MHR_NEUTRAL_PATH = "somaskel30/soma_base_fit_mhr_params.npz"


class SOMASkin:
    def __init__(
        self,
        skeleton,
    ):
        self.skeleton = skeleton

        assert isinstance(
            skeleton, (SOMASkeleton30, SOMASkeleton77)
        ), "SOMASkin currently only supports SOMASkeleton30 or SOMASkeleton77"
        assert skeleton.neutral_joints is not None, "The skeleton must have neutral joints instantiated"

        device = skeleton.neutral_joints.device
        device = "cpu"
        self.device = device

        self._soma_model = SOMALayer(
            identity_model_type="mhr",
            device=device,
        )
        self.faces = self._soma_model.faces

        neutral_mhr_path = Path(skeleton.folder).parent / SOMA_MHR_NEUTRAL_PATH
        neutral_mhr = np.load(neutral_mhr_path)

        # one time call to prepare the identity
        self.soma_identity = torch.from_numpy(neutral_mhr["identity_params"])
        self.scale_params = torch.from_numpy(neutral_mhr["scale_params"])
        self._soma_model.prepare_identity(self.soma_identity.to(device), scale_params=self.scale_params.to(device))

        # dummy output to get bind_vertices
        transl = torch.zeros(1, 3, device=device)

        self._full_skeleton = SOMASkeleton77()
        self.skel_slice = self.skeleton.get_skel_slice(self._full_skeleton)

        self.bind_vertices = self.soma_model_pose(
            self._full_skeleton.relaxed_hands_rest_pose[None],
            transl=transl,
            pose2rot=False,
        )["vertices"][0]

    def soma_model_pose(self, *args, **kwargs):
        with torch.inference_mode():
            return self._soma_model.pose(*args, **kwargs)

    def skin(self, joint_rotmat, joint_pos, rot_is_global=False):
        """
        joint_rotmat: [T, J, 3, 3] local or global joint rotation matrices
        joint_pos: [T, J, 3] global joint positions
        rot_is_global: bool, if True, joint_rotmat is global rotation matrices, otherwise it is local rotation matrices and FK is performed internally
        """

        nF, nJ = joint_pos.shape[:2]

        if rot_is_global:
            local_joint_rots_mats_subset = global_rots_to_local_rots(joint_rotmat, self.skeleton)
        else:
            local_joint_rots_mats_subset = joint_rotmat

        if nJ != self._full_skeleton.nbjoints:
            local_joint_rots_mats = self.skeleton.to_SOMASkeleton77(local_joint_rots_mats_subset)
        else:
            local_joint_rots_mats = local_joint_rots_mats_subset

        # remove the skeleton offset of the root joint
        transl = joint_pos[:, self.skeleton.root_idx] - self.skeleton.neutral_joints[0:1]

        output = self.soma_model_pose(
            local_joint_rots_mats.to(device=self.device, dtype=torch.float32),
            transl=transl.to(device=self.device, dtype=torch.float32),
            pose2rot=False,
        )
        return output["vertices"]


================================================
FILE: kimodo/viz/soma_skin.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""SOMA skeleton skinning for visualization (SOMASkeleton30 / SOMASkeleton77)."""

from pathlib import Path

import numpy as np
import torch

from kimodo.skeleton import (
    SOMASkeleton30,
    SOMASkeleton77,
    batch_rigid_transform,
    global_rots_to_local_rots,
)

# Skin for SOMASkeleton77
SKEL_PATH = "somaskel77"
SKIN_NAME = "skin_standard.npz"


class SOMASkin:
    def __init__(self, skeleton):
        skel_path = Path(skeleton.folder).parent / SKEL_PATH
        skin_data_path = skel_path / SKIN_NAME

        self.skeleton_input = skeleton
        assert isinstance(
            skeleton, (SOMASkeleton30, SOMASkeleton77)
        ), "SOMASkin currently only supports SOMASkeleton30 or SOMASkeleton77"
        assert skeleton.neutral_joints is not None, "The skeleton must have neutral joints instantiated"
        device = skeleton.neutral_joints.device

        # the skin is always the 77-joint skeleton
        #   if user is using the 30-joint skeleton, we will pad it when skinning is called
        self.skeleton_skin = SOMASkeleton77(skel_path).to(device)

        # bind_rig_transform: [R, 4, 4]
        # bind_vertices: [V, 3]
        # faces: [F, 3]
        # lbs indices, lbs weights: [V, W] (W = max (num joints vertice is related to), in our case W=5)
        skin_data = np.load(skin_data_path)
        bind_rig_np = np.array(skin_data["bind_rig_transform"], dtype=np.float32)
        self.bind_rig_transform = torch.from_numpy(bind_rig_np).to(device=device, dtype=torch.float)
        # Precompute the inverse in numpy to avoid torch lazy evaluation issues
        bind_rig_inv_np = np.linalg.inv(bind_rig_np)
        self.bind_rig_transform_inv = torch.from_numpy(bind_rig_inv_np).to(device=device, dtype=torch.float)
        self.bind_vertices = torch.tensor(skin_data["bind_vertices"], device=device, dtype=torch.float)
        self.faces = torch.tensor(skin_data["faces"], device=device, dtype=torch.long)
        self.lbs_indices = torch.tensor(skin_data["lbs_indices"], device=device, dtype=torch.long)
        self.lbs_weights = torch.tensor(skin_data["lbs_weights"], device=device, dtype=torch.float)

        # double check the rig matches expected skeleton
        rig_joint_names = list(skin_data["rig_joint_names"])  # list(str) : [R]
        for sname, rname in zip(self.skeleton_skin.bone_order_names, rig_joint_names):
            if sname != rname:
                raise ValueError(f"MISMATCH in skinnging rig: expected='{sname}' vs rig='{rname}'")

    def lbs(self, posed_transform):
        bind_rig_transform_inv = self.bind_rig_transform_inv
        bind_vertices = self.bind_vertices
        lbs_weights = self.lbs_weights
        # posed_transform: [B, F, J, 4, 4] or [B, J, 4, 4] or [J, 4, 4]
        # unsqueeze to match posed_transform dim
        for _ in range(posed_transform.dim() - 3):
            bind_rig_transform_inv = bind_rig_transform_inv.unsqueeze(0)
            bind_vertices = bind_vertices.unsqueeze(0)
            lbs_weights = lbs_weights.unsqueeze(0)
            # bind_rig_transform_inv: [..., R, 4, 4]
            # bind_vertices: [..., V, 3]
            # lbs_weights: [..., V, W]

        affine_mat = (posed_transform @ bind_rig_transform_inv)[..., :3, :]  # [..., J, 3, 4]
        vs = (
            affine_mat[..., self.lbs_indices, :, :]
            @ torch.concat([bind_vertices, torch.ones_like(bind_vertices[..., 0:1])], dim=-1)[..., None, :, None]
        )  # [..., V, W, 3, 1]
        ws = lbs_weights[..., None, None]
        resv = (vs * ws).sum(dim=-3).squeeze(-1)  # [..., V, 3]
        return resv

    def skin(self, joint_rotmat, joint_pos, rot_is_global=False):
        """
        joint_rotmat: [T, J, 3, 3] local or global joint rotation matrices
        joint_pos: [T, J, 3] global joint positions
        rot_is_global: bool, if True, joint_rotmat is global rotation matrices, otherwise it is local rotation matrices and FK is performed internally
        """
        nF, nJ = joint_pos.shape[:2]
        device = joint_rotmat.device

        if nJ != self.skeleton_skin.nbjoints:
            assert nJ == 30, "SOMASkin currently only supports 30-joint or 77-joint skeletons"

            # make sure we have local joint rotations
            if rot_is_global:
                local_joint_rots_mats_subset = global_rots_to_local_rots(joint_rotmat, self.skeleton_input)
            else:
                local_joint_rots_mats_subset = joint_rotmat

            local_joint_rots_mats = self.skeleton_input.to_SOMASkeleton77(local_joint_rots_mats_subset)

            # FK to get the global joint pos and rot
            neutral_joints_seq = self.skeleton_skin.neutral_joints[None].repeat((nF, 1, 1)).to(device)
            new_joint_pos, joint_rotmat = batch_rigid_transform(
                local_joint_rots_mats,
                neutral_joints_seq,
                self.skeleton_skin.joint_parents.to(device),
                self.skeleton_skin.root_idx,
            )
            joint_pos = new_joint_pos + joint_pos[:, self.skeleton_input.root_idx : self.skeleton_input.root_idx + 1]
            nJ = self.skeleton_skin.nbjoints
            rot_is_global = True

        # prepare full transformation matrices
        fk_transform = torch.eye(4, device=device)[None, None].repeat(nF, nJ, 1, 1)
        fk_transform[..., :3, 3] = joint_pos
        if rot_is_global:
            fk_transform[..., :3, :3] = joint_rotmat
        else:
            neutral_joints_seq = self.skeleton_skin.neutral_joints[None].repeat((nF, 1, 1)).to(device)
            # FK to get the global rotations
            _, global_joint_rotmat = batch_rigid_transform(
                joint_rotmat,
                neutral_joints_seq,
                self.skeleton_skin.joint_parents.to(device),
                self.skeleton_skin.root_idx,
            )
            fk_transform[..., :3, :3] = global_joint_rotmat

        vertices = self.lbs(fk_transform)
        return vertices


================================================
FILE: kimodo/viz/viser_utils.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Viser-based 3D viz: re-exports from viz submodules for backward compatibility."""

import os

from .constraint_ui import (
    ConstraintSet,
    EEJointsKeyframeSet,
    FullbodyKeyframeSet,
    RootKeyframe2DSet,
    build_constraint_set_table_markdown,
    update_interval,
)
from .gui import GuiElements
from .playback import CharacterMotion
from .scene import (
    DARK_THEME,
    LIGHT_THEME,
    SKIN_CACHE,
    Character,
    SkeletonMesh,
    WaypointMesh,
)


def load_example_cases(examples_base_dir):
    """List subdirectories of examples_base_dir as a name -> path dict."""
    example_dirs = os.listdir(examples_base_dir)
    example_names = sorted([d for d in example_dirs if os.path.isdir(os.path.join(examples_base_dir, d))])
    return {name: os.path.join(examples_base_dir, name) for name in example_names}


__all__ = [
    "Character",
    "CharacterMotion",
    "ConstraintSet",
    "DARK_THEME",
    "EEJointsKeyframeSet",
    "FullbodyKeyframeSet",
    "GuiElements",
    "LIGHT_THEME",
    "RootKeyframe2DSet",
    "SKIN_CACHE",
    "SkeletonMesh",
    "WaypointMesh",
    "build_constraint_set_table_markdown",
    "load_example_cases",
    "update_interval",
]


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "kimodo"
version = "1.0.0"
description = "Kimodo motion generation model"
readme = "README.md"
requires-python = ">=3.8"
license = {text = "Apache-2.0"}
dependencies = [
  "hydra-core>=1.3",
  "omegaconf>=2.3",
  "numpy>=1.23",
  "scipy>=1.10",
  "transformers==5.1.0",
  "urllib3>=2.6.3",
  "boto3",
  "peft>=0.18",
  "einops>=0.7",
  "tqdm>=4.0",
  "packaging>=21.0",
  "pydantic>=2.0",
  "filelock>=3.20.3",
  "gradio>=6.8.0",
  "gradio_client>=1.0",
  "trimesh>=3.21.7",
  "scenepic>=1.1.0",
  "pillow>=9.0",
  "av>=16.1.0",
  "bvhio",
]

[project.optional-dependencies]
demo = [
  "viser @ git+https://github.com/nv-tlabs/kimodo-viser.git",
]
soma = [
  "py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git"
]
all = [
  "viser @ git+https://github.com/nv-tlabs/kimodo-viser.git",
  "py-soma-x @ git+https://github.com/NVlabs/SOMA-X.git"
]

[project.scripts]
kimodo_gen = "kimodo.scripts.generate:main"
kimodo_demo = "kimodo.demo:main"
kimodo_textencoder = "kimodo.scripts.run_text_encoder_server:main"
kimodo_convert = "kimodo.scripts.motion_convert:main"

[tool.setuptools]
include-package-data = true
zip-safe = false

[tool.setuptools.package-data]
kimodo = ["assets/**/*"]

[tool.flake8]
max-line-length = 120

[tool.ruff]
extend-select = ["I001"]  # Enable import sorting
line-length = 120

[tool.ruff.lint.isort]
known-first-party = ["kimodo"]
known-third-party = ["torch", "numpy", "pytorch_lightning", "wandb", "tqdm"]
force-sort-within-sections = false


================================================
FILE: setup.py
================================================
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os
import shutil
import subprocess
import sys
from pathlib import Path

from setuptools import Extension, find_packages, setup
from setuptools.command.build_ext import build_ext


class CMakeExtension(Extension):
    def __init__(self, name, sourcedir=""):
        super().__init__(name, sources=[])
        self.sourcedir = os.path.abspath(sourcedir)


class CMakeBuild(build_ext):
    def run(self):
        try:
            subprocess.check_output(["cmake", "--version"])
        except OSError as exc:
            raise RuntimeError("CMake must be installed to build this package") from exc

        for ext in self.extensions:
            self.build_extension(ext)

    def build_extension(self, ext):
        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
        cmake_args = [
            f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
            f"-DPYTHON_EXECUTABLE={sys.executable}",
        ]

        cfg = "Debug" if self.debug else "Release"
        build_args = ["--config", cfg]
        cmake_args.append(f"-DCMAKE_BUILD_TYPE={cfg}")

        use_mingw = False
        mingw_bin = None

        if sys.platform == "win32":
            generator = os.environ.get("CMAKE_GENERATOR", "")
            if generator:
                cmake_args = ["-G", generator] + cmake_args
                if "mingw" in generator.lower():
                    use_mingw = True
                else:
                    cmake_args.append(f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}")
            else:
                try:
                    subprocess.check_output(["g++", "--version"], stderr=subprocess.STDOUT)
                    use_mingw = True
                    cmake_args = ["-G", "MinGW Makefiles"] + cmake_args
                    build_args = []
                except (OSError, subprocess.CalledProcessError):
                    cmake_args.append(f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}")

            if use_mingw:
                gxx_path = shutil.which("g++")
                if gxx_path:
                    mingw_bin = Path(gxx_path).parent
        else:
            build_args += ["--", "-j4"]

        env = os.environ.copy()
        env["CXXFLAGS"] = f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"'

        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)

        subprocess.check_call(["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
        subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp)

        if use_mingw and mingw_bin is not None:
            runtime_libs = [
                "libstdc++-6.dll",
                "libgcc_s_seh-1.dll",
                "libwinpthread-1.dll",
            ]
            extdir_path = Path(extdir)
            extdir_path.mkdir(parents=True, exist_ok=True)
            for lib_name in runtime_libs:
                src_path = mingw_bin / lib_name
                if src_path.exists():
                    shutil.copy2(src_path, extdir_path / lib_name)
                else:
                    self.announce(
                        f"Warning: Expected MinGW runtime DLL '{lib_name}' not found next to g++ (looked in {mingw_bin}). "
                        "The built extension may fail to import if the DLL is not on PATH.",
                        level=3,
                    )


kimodo_packages = find_packages(include=["kimodo", "kimodo.*"])

# When set (e.g. in Docker), do not bundle motion_correction here; it is installed
# separately (e.g. from docker_requirements.txt as ./MotionCorrection) non-editable.
skip_motion_correction = os.environ.get("SKIP_MOTION_CORRECTION_IN_SETUP", "").strip().lower() in ("1", "true", "yes")

if skip_motion_correction:
    packages = kimodo_packages
    package_dir = {}
    ext_modules = []
    cmdclass = {}
else:
    packages = kimodo_packages + ["motion_correction"]
    package_dir = {"motion_correction": "MotionCorrection/python/motion_correction"}
    ext_modules = [CMakeExtension("motion_correction._motion_correction", "MotionCorrection")]
    cmdclass = {"build_ext": CMakeBuild}

setup(
    packages=packages,
    package_dir=package_dir,
    ext_modules=ext_modules,
    cmdclass=cmdclass,
)