Repository: inspirai/TimeChamber
Branch: main
Commit: af3f3571c99a
Files: 201
Total size: 120.5 MB

Directory structure:
gitextract_rvpupy7y/

├── .gitattributes
├── .gitignore
├── LICENSE
├── LISENCE/
│   └── isaacgymenvs/
│       └── LICENSE
├── README.md
├── assets/
│   └── mjcf/
│       └── nv_ant.xml
├── docs/
│   └── environments.md
├── setup.py
└── timechamber/
    ├── __init__.py
    ├── ase/
    │   ├── ase_agent.py
    │   ├── ase_models.py
    │   ├── ase_network_builder.py
    │   ├── ase_players.py
    │   ├── hrl_agent.py
    │   ├── hrl_models.py
    │   ├── hrl_network_builder.py
    │   ├── hrl_players.py
    │   └── utils/
    │       ├── amp_agent.py
    │       ├── amp_datasets.py
    │       ├── amp_models.py
    │       ├── amp_network_builder.py
    │       ├── amp_players.py
    │       ├── common_agent.py
    │       ├── common_player.py
    │       └── replay_buffer.py
    ├── cfg/
    │   ├── config.yaml
    │   ├── task/
    │   │   ├── MA_Ant_Battle.yaml
    │   │   ├── MA_Ant_Sumo.yaml
    │   │   └── MA_Humanoid_Strike.yaml
    │   └── train/
    │       ├── MA_Ant_BattlePPO.yaml
    │       ├── MA_Ant_SumoPPO.yaml
    │       ├── MA_Humanoid_StrikeHRL.yaml
    │       └── base/
    │           └── ase_humanoid_hrl.yaml
    ├── learning/
    │   ├── common_agent.py
    │   ├── common_player.py
    │   ├── hrl_sp_agent.py
    │   ├── hrl_sp_player.py
    │   ├── pfsp_player_pool.py
    │   ├── ppo_sp_agent.py
    │   ├── ppo_sp_player.py
    │   ├── replay_buffer.py
    │   ├── vectorized_models.py
    │   └── vectorized_network_builder.py
    ├── models/
    │   ├── Humanoid_Strike/
    │   │   ├── policy.pth
    │   │   └── policy_op.pth
    │   ├── ant_battle_2agents/
    │   │   └── policy.pth
    │   ├── ant_battle_3agents/
    │   │   └── policy.pth
    │   └── ant_sumo/
    │       └── policy.pth
    ├── tasks/
    │   ├── __init__.py
    │   ├── ase_humanoid_base/
    │   │   ├── base_task.py
    │   │   ├── humanoid.py
    │   │   ├── humanoid_amp.py
    │   │   ├── humanoid_amp_task.py
    │   │   └── poselib/
    │   │       ├── README.md
    │   │       ├── data/
    │   │       │   ├── 01_01_cmu.fbx
    │   │       │   ├── 07_01_cmu.fbx
    │   │       │   ├── 08_02_cmu.fbx
    │   │       │   ├── 09_11_cmu.fbx
    │   │       │   ├── 49_08_cmu.fbx
    │   │       │   ├── 55_01_cmu.fbx
    │   │       │   ├── amp_humanoid_tpose.npy
    │   │       │   ├── cmu_tpose.npy
    │   │       │   ├── configs/
    │   │       │   │   ├── retarget_cmu_to_amp.json
    │   │       │   │   └── retarget_sfu_to_amp.json
    │   │       │   └── sfu_tpose.npy
    │   │       ├── fbx_importer.py
    │   │       ├── generate_amp_humanoid_tpose.py
    │   │       ├── mjcf_importer.py
    │   │       ├── poselib/
    │   │       │   ├── __init__.py
    │   │       │   ├── core/
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── backend/
    │   │       │   │   │   ├── __init__.py
    │   │       │   │   │   ├── abstract.py
    │   │       │   │   │   └── logger.py
    │   │       │   │   ├── rotation3d.py
    │   │       │   │   ├── tensor_utils.py
    │   │       │   │   └── tests/
    │   │       │   │       ├── __init__.py
    │   │       │   │       └── test_rotation.py
    │   │       │   ├── skeleton/
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── backend/
    │   │       │   │   │   ├── __init__.py
    │   │       │   │   │   └── fbx/
    │   │       │   │   │       ├── __init__.py
    │   │       │   │   │       ├── fbx_backend.py
    │   │       │   │   │       └── fbx_read_wrapper.py
    │   │       │   │   └── skeleton3d.py
    │   │       │   └── visualization/
    │   │       │       ├── __init__.py
    │   │       │       ├── common.py
    │   │       │       ├── core.py
    │   │       │       ├── plt_plotter.py
    │   │       │       ├── simple_plotter_tasks.py
    │   │       │       ├── skeleton_plotter_tasks.py
    │   │       │       └── tests/
    │   │       │           ├── __init__.py
    │   │       │           └── test_plotter.py
    │   │       └── retarget_motion.py
    │   ├── base/
    │   │   ├── __init__.py
    │   │   ├── ma_vec_task.py
    │   │   └── vec_task.py
    │   ├── data/
    │   │   ├── assets/
    │   │   │   └── mjcf/
    │   │   │       └── amp_humanoid_sword_shield.xml
    │   │   ├── models/
    │   │   │   └── llc_reallusion_sword_shield.pth
    │   │   └── motions/
    │   │       └── reallusion_sword_shield/
    │   │           ├── README.txt
    │   │           ├── RL_Avatar_Atk_2xCombo01_Motion.npy
    │   │           ├── RL_Avatar_Atk_2xCombo02_Motion.npy
    │   │           ├── RL_Avatar_Atk_2xCombo03_Motion.npy
    │   │           ├── RL_Avatar_Atk_2xCombo04_Motion.npy
    │   │           ├── RL_Avatar_Atk_2xCombo05_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo01_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo02_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo03_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo04_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo05_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo06_Motion.npy
    │   │           ├── RL_Avatar_Atk_3xCombo07_Motion.npy
    │   │           ├── RL_Avatar_Atk_4xCombo01_Motion.npy
    │   │           ├── RL_Avatar_Atk_4xCombo02_Motion.npy
    │   │           ├── RL_Avatar_Atk_4xCombo03_Motion.npy
    │   │           ├── RL_Avatar_Atk_Jump_Motion.npy
    │   │           ├── RL_Avatar_Atk_Kick_Motion.npy
    │   │           ├── RL_Avatar_Atk_ShieldCharge_Motion.npy
    │   │           ├── RL_Avatar_Atk_ShieldSwipe01_Motion.npy
    │   │           ├── RL_Avatar_Atk_ShieldSwipe02_Motion.npy
    │   │           ├── RL_Avatar_Atk_SlashDown_Motion.npy
    │   │           ├── RL_Avatar_Atk_SlashLeft_Motion.npy
    │   │           ├── RL_Avatar_Atk_SlashRight_Motion.npy
    │   │           ├── RL_Avatar_Atk_SlashUp_Motion.npy
    │   │           ├── RL_Avatar_Atk_Spin_Motion.npy
    │   │           ├── RL_Avatar_Atk_Stab_Motion.npy
    │   │           ├── RL_Avatar_Counter_Atk01_Motion.npy
    │   │           ├── RL_Avatar_Counter_Atk02_Motion.npy
    │   │           ├── RL_Avatar_Counter_Atk03_Motion.npy
    │   │           ├── RL_Avatar_Counter_Atk04_Motion.npy
    │   │           ├── RL_Avatar_Counter_Atk05_Motion.npy
    │   │           ├── RL_Avatar_Dodge_Backward_Motion.npy
    │   │           ├── RL_Avatar_Dodgle_Left_Motion.npy
    │   │           ├── RL_Avatar_Dodgle_Right_Motion.npy
    │   │           ├── RL_Avatar_Fall_Backward_Motion.npy
    │   │           ├── RL_Avatar_Fall_Left_Motion.npy
    │   │           ├── RL_Avatar_Fall_Right_Motion.npy
    │   │           ├── RL_Avatar_Fall_SpinLeft_Motion.npy
    │   │           ├── RL_Avatar_Fall_SpinRight_Motion.npy
    │   │           ├── RL_Avatar_Idle_Alert(0)_Motion.npy
    │   │           ├── RL_Avatar_Idle_Alert_Motion.npy
    │   │           ├── RL_Avatar_Idle_Battle(0)_Motion.npy
    │   │           ├── RL_Avatar_Idle_Battle_Motion.npy
    │   │           ├── RL_Avatar_Idle_Ready(0)_Motion.npy
    │   │           ├── RL_Avatar_Idle_Ready_Motion.npy
    │   │           ├── RL_Avatar_Kill_2xCombo01_Motion.npy
    │   │           ├── RL_Avatar_Kill_2xCombo02_Motion.npy
    │   │           ├── RL_Avatar_Kill_3xCombo01_Motion.npy
    │   │           ├── RL_Avatar_Kill_3xCombo02_Motion.npy
    │   │           ├── RL_Avatar_Kill_4xCombo01_Motion.npy
    │   │           ├── RL_Avatar_RunBackward_Motion.npy
    │   │           ├── RL_Avatar_RunForward_Motion.npy
    │   │           ├── RL_Avatar_RunLeft_Motion.npy
    │   │           ├── RL_Avatar_RunRight_Motion.npy
    │   │           ├── RL_Avatar_Shield_BlockBackward_Motion.npy
    │   │           ├── RL_Avatar_Shield_BlockCrouch_Motion.npy
    │   │           ├── RL_Avatar_Shield_BlockDown_Motion.npy
    │   │           ├── RL_Avatar_Shield_BlockLeft_Motion.npy
    │   │           ├── RL_Avatar_Shield_BlockRight_Motion.npy
    │   │           ├── RL_Avatar_Shield_BlockUp_Motion.npy
    │   │           ├── RL_Avatar_Standoff_Circle_Motion.npy
    │   │           ├── RL_Avatar_Standoff_Feint_Motion.npy
    │   │           ├── RL_Avatar_Standoff_Swing_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryBackward01_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryBackward02_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryBackward03_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryBackward04_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryCrouch_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryDown_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryLeft_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryRight_Motion.npy
    │   │           ├── RL_Avatar_Sword_ParryUp_Motion.npy
    │   │           ├── RL_Avatar_Taunt_PoundChest_Motion.npy
    │   │           ├── RL_Avatar_Taunt_Roar_Motion.npy
    │   │           ├── RL_Avatar_Taunt_ShieldKnock_Motion.npy
    │   │           ├── RL_Avatar_TurnLeft180_Motion.npy
    │   │           ├── RL_Avatar_TurnLeft90_Motion.npy
    │   │           ├── RL_Avatar_TurnRight180_Motion.npy
    │   │           ├── RL_Avatar_TurnRight90_Motion.npy
    │   │           ├── RL_Avatar_WalkBackward01_Motion.npy
    │   │           ├── RL_Avatar_WalkBackward02_Motion.npy
    │   │           ├── RL_Avatar_WalkForward01_Motion.npy
    │   │           ├── RL_Avatar_WalkForward02_Motion.npy
    │   │           ├── RL_Avatar_WalkLeft01_Motion.npy
    │   │           ├── RL_Avatar_WalkLeft02_Motion.npy
    │   │           ├── RL_Avatar_WalkRight01_Motion.npy
    │   │           ├── RL_Avatar_WalkRight02_Motion.npy
    │   │           └── dataset_reallusion_sword_shield.yaml
    │   ├── ma_ant_battle.py
    │   ├── ma_ant_sumo.py
    │   └── ma_humanoid_strike.py
    ├── train.py
    └── utils/
        ├── config.py
        ├── gym_util.py
        ├── logger.py
        ├── motion_lib.py
        ├── reformat.py
        ├── rlgames_utils.py
        ├── torch_jit_utils.py
        ├── torch_utils.py
        ├── utils.py
        ├── vec_task.py
        └── vec_task_wrappers.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================


================================================
FILE: .gitignore
================================================
videos
/timechamber/logs
*train_dir*
*ige_logs*
*.egg-info
/.vs
/.vscode
/_package
/shaders
._tmptext.txt
__pycache__/
/timechamber/tasks/__pycache__
/timechamber/utils/__pycache__
/timechamber/tasks/base/__pycache__
/tools/format/.lastrun
*.pyc
_doxygen
/rlisaacgymenvsgpu/logs
/timechamber/benchmarks/results
/timechamber/simpletests/results
*.pxd2
/tests/logs
/timechamber/balance_bot.xml
/timechamber/quadcopter.xml
/timechamber/ingenuity.xml
logs*
nn/
runs/
.idea
outputs/
*.hydra*
/timechamber/wandb
/test
.gitlab


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2022 MIT Inspir.ai

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================================
FILE: LISENCE/isaacgymenvs/LICENSE
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: README.md
================================================
# TimeChamber: A Massively Parallel Large Scale Self-Play Framework

****

**TimeChamber** is a large scale self-play framework running on parallel simulation.
Running self-play algorithms always need lots of hardware resources, especially on 3D physically simulated
environments.
We provide a self-play framework that can achieve fast training and evaluation with **ONLY ONE GPU**.
TimeChamber is developed with the following key features:

- **Parallel Simulation**: TimeChamber is built within [Isaac Gym](https://developer.nvidia.com/isaac-gym). Isaac Gym is
  a fast GPU-based simulation platform. It supports running thousands of environments in parallel on a single GPU.For
  example, on one NVIDIA Laptop RTX 3070Ti GPU, TimeChamber can reach **80,000+
  mean FPS** by running 4,096 environments in parallel.
- **Parallel Evaluation**: TimeChamber can fast calculate dozens of policies' ELO
  rating(represent their combat power). It also supports multi-player ELO calculations
  by [multi-elo](https://github.com/djcunningham0/multielo). Inspired by Vectorization techniques
  for [fast population-based training](https://github.com/instadeepai/fastpbrl), we leverage the
  vectorized models to evaluate different policy in parallel.
- **Prioritized Fictitious Self-Play Benchmark**: We implement a classic PPO self-play algorithm on top
  of [rl_games](https://github.com/Denys88/rl_games), with a prioritized player pool to avoid cycles and improve the
  diversity of training policy.

<div align=center>
<img src="assets/images/algorithm.jpg" align="center" width="600"/>
</div> 

- **Competitive Multi-Agent Tasks**: Inspired by [OpenAI RoboSumo](https://github.com/openai/robosumo) and [ASE](https://github.com/nv-tlabs/ASE), we introduce three
  competitive multi-agent tasks(e.g.,Ant Sumo,Ant
  Battle and Humanoid Strike) as examples.
  The efficiency of our self-play framework has been tested on these tasks. After days of training,our agent can
  discover some interesting
  physical skills like pulling, jumping,etc. **Welcome to contribute your own environments!**


## Installation

****
Download and follow the installation instructions of Isaac Gym: https://developer.nvidia.com/isaac-gym  
Ensure that Isaac Gym works on your system by running one of the examples from the `python/examples`
directory, like `joint_monkey.py`. If you have any trouble running the samples, please follow troubleshooting steps
described in the [Isaac Gym Preview Release 3/4 installation instructions](https://developer.nvidia.com/isaac-gym).  
Then install this repo:

```bash
pip install -e .
```

## Quick Start

****

### Tasks

Source code for tasks can be found in  `timechamber/tasks`,The detailed settings of state/action/reward are
in [here](./docs/environments.md).
More interesting tasks will come soon.

#### Humanoid Strike

Humanoid Strike is a 3D environment with two simulated humanoid physics characters. Each character is equipped with a sword and shield with 37 degrees-of-freedom.
The game will be restarted if one agent goes outside the arena. We measure how much the player damaged the opponent and how much the player was damaged by the opponent in the terminated step to determine the winner.

<div align=center>
<img src="assets/images/humanoid_strike.gif" align="center" width="600"/>
</div> 


#### Ant Sumo

Ant Sumo is a 3D environment with simulated physics that allows pairs of ant agents to compete against each other.
To win, the agent has to push the opponent out of the ring. Every agent has 100 hp . Each step, If the agent's body
touches the ground, its hp will be reduced by 1.The agent whose hp becomes 0 will be eliminated.
<div align=center>
<img src="assets/images/ant_sumo.gif" align="center" width="600"/>
</div> 

#### Ant Battle

Ant Battle is an expanded environment of Ant Sumo. It supports more than two agents competing against with
each other. The battle ring radius will shrink, the agent going out of the ring will be eliminated.
<div align=center>
<img src="assets/images/ant_battle.gif" align="center" width="600"/>
</div>  

### Self-Play Training

To train your policy for tasks, for example:

```bash
# run self-play training for Humanoid Strike task
python train.py task=MA_Humanoid_Strike headless=True
```

```bash
# run self-play training for Ant Sumo task
python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO headless=True
```

```bash
# run self-play training for Ant Battle task
python train.py task=MA_Ant_Battle train=MA_Ant_BattlePPO headless=True
```

Key arguments to the training script
follow [IsaacGymEnvs Configuration and command line arguments](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/README.md#configuration-and-command-line-arguments)
.
Other training arguments follow [rl_games config parameters](https://github.com/Denys88/rl_games#config-parameters),
you can change them in `timechamber/tasks/train/*.yaml`. There are some specific arguments for self-play training:

- `num_agents`: Set the number of agents for Ant Battle environment, it should be larger than 1.
- `op_checkpoint`: Set to path to the checkpoint to load initial opponent agent policy.
  If it's empty, opponent agent will use random policy.
- `update_win_rate`: Win_rate threshold to add the current policy to opponent's player pool.
- `player_pool_length`: The max size of player pool, following FIFO rules.
- `games_to_check`: Warm up for training, the player pool won't be updated until the current policy plays such number of
  games.
- `max_update_steps`: If current policy update iterations exceed that number, the current policy will be added to
  opponent player_pool.

### Policies Evaluation

To evaluate your policies, for example:

```bash
# run testing for Ant Sumo policy
python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO test=True num_envs=4 minibatch_size=32 headless=False checkpoint='models/ant_sumo/policy.pth'
```

```bash
# run testing for Humanoid Strike policy
python train.py task=MA_Humanoid_Strike train=MA_Humanoid_StrikeHRL test=True num_envs=4 minibatch_size=32 headless=False checkpoint='models/Humanoid_Strike/policy.pth' op_checkpoint='models/Humanoid_Strike/policy_op.pth'
```

You can set the opponent agent policy using `op_checkpoint`. If it's empty, the opponent agent will use the same policy
as `checkpoint`.  
We use vectorized models to accelerate the evaluation of policies. Put policies into checkpoint dir, let them compete
with each
other in parallel:

```bash
# run testing for Ant Sumo policy
python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO test=True headless=True checkpoint='models/ant_sumo' player_pool_type=vectorized
```

There are some specific arguments for self-play evaluation, you can change them in `timechamber/tasks/train/*.yaml`:

- `games_num`: Total episode number of evaluation.
- `record_elo`: Set `True` to record the ELO rating of your policies, after evaluation, you can check the `elo.jpg` in
  your checkpoint dir.

<div align=center>
  <img src="assets/images/elo.jpg" align="center" width="400"/>
</div>

- `init_elo`: Initial ELO rating of each policy.

### Building Your Own Task

You can build your own task
follow [IsaacGymEnvs](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/README.md#creating-an-environment)
, make sure the obs shape is correct and`info` contains `win`,`lose`and`draw`:

```python
import isaacgym
import timechamber
import torch

envs = timechamber.make(
    seed=0,
    task="MA_Ant_Sumo",
    num_envs=2,
    sim_device="cuda:0",
    rl_device="cuda:0",
)
# the obs shape should be (num_agents*num_envs,num_obs).
# the obs of training agent is (:num_envs,num_obs)
print("Observation space is", envs.observation_space)
print("Action space is", envs.action_space)
obs = envs.reset()
for _ in range(20):
    obs, reward, done, info = envs.step(
        torch.rand((2 * 2,) + envs.action_space.shape, device="cuda:0")
    )
# info:
# {'win': tensor([Bool, Bool])
# 'lose': tensor([Bool, Bool])
# 'draw': tensor([Bool, Bool])}

```

## Citing

If you use timechamber in your research please use the following citation:

````
@misc{InspirAI,
  author = {Huang Ziming, Ziyi Liu, Wu Yutong, Flood Sung},
  title = {TimeChamber: A Massively Parallel Large Scale Self-Play Framework},
  year = {2022},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/inspirai/TimeChamber}},
}

================================================
FILE: assets/mjcf/nv_ant.xml
================================================
<mujoco model="ant">
  <custom>
    <numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
  </custom>

  <default>
    <joint armature="0.01" damping="0.1" limited="true"/>
    <geom condim="3" density="5.0" friction="1.5 0.1 0.1" margin="0.01" rgba="0.97 0.38 0.06 1"/>
  </default>

  <compiler inertiafromgeom="true" angle="degree"/>

  <option timestep="0.016" iterations="50" tolerance="1e-10" solver="Newton" jacobian="dense" cone="pyramidal"/>

  <size nconmax="50" njmax="200" nstack="10000"/>
  <visual>
      <map force="0.1" zfar="30"/>
      <rgba haze="0.15 0.25 0.35 1"/>
      <quality shadowsize="2048"/>
      <global offwidth="800" offheight="800"/>
  </visual>

  <asset>
      <texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512" height="512"/> 
      <texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" width="512" height="512" mark="cross" markrgb=".8 .8 .8"/>
      <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" 
          rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01"/>  

      <material name="matplane" reflectance="0.3" texture="texplane" texrepeat="1 1" texuniform="true"/>
      <material name="matgeom" texture="texgeom" texuniform="true" rgba="0.8 0.6 .4 1"/>
  </asset>

  <worldbody>
    <geom name="floor" pos="0 0 0" size="0 0 .25" type="plane" material="matplane" condim="3"/>

    <light directional="false" diffuse=".2 .2 .2" specular="0 0 0" pos="0 0 5" dir="0 0 -1" castshadow="false"/>
    <light mode="targetbodycom" target="torso" directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="0 0 4.0" dir="0 0 -1"/>

    <body name="torso" pos="0 0 0.75">
      <freejoint name="root"/>
      <geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
      <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule" rgba=".999 .2 .1 1"/>
      <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
      <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
      <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule" rgba=".999 .2 .02 1"/>

      <body name="front_left_leg" pos="0.2 0.2 0">
        <joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-40 40" type="hinge"/>
        <geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule" rgba=".999 .2 .1 1"/>
        <body pos="0.2 0.2 0" name="front_left_foot">
          <joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 100" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule" rgba=".999 .2 .1 1"/>
        </body>
      </body>
      <body name="front_right_leg" pos="-0.2 0.2 0">
        <joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-40 40" type="hinge"/>
        <geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
        <body pos="-0.2 0.2 0" name="front_right_foot">
          <joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-100 -30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
        </body>
      </body>
      <body name="left_back_leg" pos="-0.2 -0.2 0">
        <joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-40 40" type="hinge"/>
        <geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
        <body pos="-0.2 -0.2 0" name="left_back_foot">
          <joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-100 -30" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
        </body>
      </body>
      <body name="right_back_leg" pos="0.2 -0.2 0">
        <joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-40 40" type="hinge"/>
        <geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule" rgba=".999 .2 .1 1"/>
        <body pos="0.2 -0.2 0" name="right_back_foot">
          <joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 100" type="hinge"/>
          <geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule" rgba=".999 .2 .1 1"/>
        </body>
      </body>
    </body>
  </worldbody>

  <actuator>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="15"/>
    <motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="15"/>
  </actuator>
</mujoco>


================================================
FILE: docs/environments.md
================================================
## Environments

We provide a detailed description of the environment here.

### Humanoid Strike

Humanoid Strike is a 3D environment with two simulated humanoid physics characters. Each character is equipped with a sword and shield with 37 degrees-of-freedom.
The game will be restarted if one agent goes outside the arena or the game reaches the maximum episode steps. We measure how much the player damaged the opponent and how much the player was damaged by the opponent in the terminated step to determine the winner.

#### <span id="obs1-1">Low-Level Observation Space</span>

|  Index  |          Description           |
|:-------:|:------------------------------:|
|  0   |           Height of the root from the ground.            |
|  1 - 48  |         Position of the body in the character’s local coordinate frame.         |
|  49 - 150  |      Rotation of the body in the character’s local coordinate frame.      |
| 151 - 201 |      Linear velocity of the root in the character’s local coordinate frame.       |
| 202 - 252 |      angular velocity of the root in the character’s local coordinate frame.          |


#### <span id="obs1-2">High-Level Observation Space</span>

|  Index  |          Description           |
|:-------:|:------------------------------:|
|  0 - 1  |    relative distance from the borderline            |
|  2 - 4  |    relative distance from the opponent          |
|  5 - 10  |      Rotation of the opponent's root in the character’s local coordinate frame.      |
| 11 - 13 |      Linear velocity of the opponent'root in the character’s local coordinate frame.       |
| 14 - 16 |      angular velocity of the opponent'root in the character’s local coordinate frame.         |
| 17 - 19 |      relative distance between ego agent and opponent's sword         |
| 20 - 22 |      Linear velocity of the opponent' sword in the character’s local coordinate frame.          |
| 23 - 25 |      relative distance between ego agent' shield and opponent's sword        |
| 26 - 28 | relative velocity between ego agent' shield and opponent's sword |
|   29 - 31    |   relative distance between ego agent' sword and opponent's torse    |
|   32 - 34    | relative velocity between ego agent' sword and opponent's torse  |
|   35 - 37    |   relative distance between ego agent' sword and opponent's head    |
|   38 - 40    | relative velocity between ego agent' sword and opponent's head  |
|   41 - 43    |   relative distance between ego agent' sword and opponent's right arm    |
|   44 - 46    | relative distance between ego agent' sword and opponent's right thigh  |
|   47 - 49    | relative distance between ego agent' sword and opponent's left thigh  |


#### <span id="action1-1">Low-Level Action Space</span>

| Index |    Description    |
|:-----:|:-----------------:|
| 0 - 30 | target rotations  of each character’s joints |

#### <span id="action1-2">High-Level Action Space</span>

| Index |    Description    |
|:-----:|:-----------------:|
| 0 - 63 | latent skill variables |

#### <span id="r1">Rewards</span>

The weights of reward components are as follows:

```python
op_fall_reward_w = 200.0
ego_fall_out_reward_w = 50.0
shield_to_sword_pos_reward_w = 1.0
damage_reward_w = 8.0
sword_to_op_reward_w = 0.8
reward_energy_w = 3.0
reward_strike_vel_acc_w = 3.0
reward_face_w = 4.0
reward_foot_to_op_w = 10.0
reward_kick_w = 2.0
```


### Ant Sumo

Ant Sumo is a 3D environment with simulated physics that allows pairs of ant agents to compete against each other.
To win, the agent has to push the opponent out of the ring. Every agent has 100 hp . Each step, If the agent's body
touches the ground, its hp will be reduced by 1.The agent whose hp becomes 0 will be eliminated.

#### <span id="obs2">Observation Space</span>

|  Index  |          Description           |
|:-------:|:------------------------------:|
|  0 - 2  |           self pose            |
|  3 - 6  |         self rotation          |
|  7 - 9  |      self linear velocity      |
| 10 - 12 |      self angle velocity       |
| 13 - 20 |          self dof pos          |
| 21 - 28 |       self dof velocity        |
| 29 - 31 |         opponent pose          |
| 32 - 35 |       opponent rotation        |
| 36 - 37 | self-opponent pose vector(x,y) |
|   38    |   is self body touch ground    |
|   39    | is opponent body touch ground  |

#### <span id="action2">Action Space</span>

| Index |    Description    |
|:-----:|:-----------------:|
| 0 - 7 | self dof position |

#### <span id="r2">Rewards</span>

The reward consists of two parts:sparse reward and dense reward.

```python
win_reward = 2000
lose_penalty = -2000
draw_penalty = -1000
dense_reward_scale = 1.
dof_at_limit_cost = torch.sum(obs_buf[:, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
push_reward = -push_scale * torch.exp(-torch.linalg.norm(obs_buf_op[:, :2], dim=-1))
action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
not_move_penalty = -10 * torch.exp(-torch.sum(torch.abs(torques), dim=1))
dense_reward = move_reward + dof_at_limit_cost + push_reward + action_cost_penalty + not_move_penalty
total_reward = win_reward + lose_penalty + draw_penalty + dense_reward * dense_reward_scale
```

### Ant Battle

Ant Battle is an expanded environment of Ant Sumo. It supports more than two agents competing against with
each other. The battle ring radius will shrink, the agent going out of the ring will be eliminated.

#### <span id="obs3">Observation Space</span>

|  Index  |              Description               |
|:-------:|:--------------------------------------:|
|  0 - 2  |               self pose                |
|  3 - 6  |             self rotation              |
|  7 - 9  |          self linear velocity          |
| 10 - 12 |          self angle velocity           |
| 13 - 20 |              self dof pos              |
| 21 - 28 |           self dof velocity            |
|   29    |    border radius-self dis to centre    |
|   30    |             border radius              |
|   31    |       is self body touch ground        |
| 32 - 34 |            opponent_1 pose             |
| 35 - 38 |          opponent_1 rotation           |
| 39 - 40 |    self-opponent_1 pose vector(x,y)    |
| 41 - 48 |          opponent_1 dof pose           |
| 49 - 56 |        opponent_1 dof velocity         |
|   57    | border radius-opponent_1 dis to centre |
|   58    |    is opponent_1 body touch ground     |
|   ...   |                  ...                   |

#### <span id="action3">Action Space</span>

| Index |    Description    |
|:-----:|:-----------------:|
| 0 - 7 | self dof position |

#### <span id="r3">Rewards</span>

The reward consists of two parts:sparse reward and dense reward.

```python
win_reward_scale = 2000
reward_per_rank = 2 * win_reward_scale / (num_agents - 1)
sparse_reward = sparse_reward * (win_reward_scale - (nxt_rank[:, 0] - 1) * reward_per_rank)
stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs[0, :, :2], dim=-1))
dof_at_limit_cost = torch.sum(obs[0, :, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
not_move_penalty = torch.exp(-torch.sum(torch.abs(torques), dim=1))
dense_reward = dof_at_limit_cost + action_cost_penalty + not_move_penalty + stay_in_center_reward
total_reward = sparse_reward + dense_reward * dense_reward_scale
```

================================================
FILE: setup.py
================================================
"""Installation script for the 'timechamber' python package."""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

from setuptools import setup, find_packages

import os

root_dir = os.path.dirname(os.path.realpath(__file__))

# Minimum dependencies required prior to installation
INSTALL_REQUIRES = [
    # RL
    "gym==0.24",
    "torch",
    "omegaconf",
    "termcolor",
    "dill",
    "hydra-core>=1.1",
    "rl-games==1.5.2",
    "pyvirtualdisplay",
    "multielo @ git+https://github.com/djcunningham0/multielo.git@440f7922b90ff87009f8283d6491eb0f704e6624",
    "matplotlib==3.5.2",
    "pytest==7.1.2",
]

# Installation operation
setup(
    name="timechamber",
    author="ZeldaHuang, Ziyi Liu",
    version="0.0.1",
    description="A Massively Parallel Large Scale Self-Play Framework",
    keywords=["robotics", "rl"],
    include_package_data=True,
    python_requires=">=3.6.*",
    install_requires=INSTALL_REQUIRES,
    packages=find_packages("."),
    classifiers=["Natural Language :: English", "Programming Language :: Python :: 3.7, 3.8"],
    zip_safe=False,
)

# EOF


================================================
FILE: timechamber/__init__.py
================================================
import hydra
from hydra import compose, initialize
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from timechamber.utils.reformat import omegaconf_to_dict


OmegaConf.register_new_resolver('eq', lambda x, y: x.lower()==y.lower())
OmegaConf.register_new_resolver('contains', lambda x, y: x.lower() in y.lower())
OmegaConf.register_new_resolver('if', lambda pred, a, b: a if pred else b)
OmegaConf.register_new_resolver('resolve_default', lambda default, arg: default if arg=='' else arg)


def make(
    seed: int, 
    task: str, 
    num_envs: int, 
    sim_device: str,
    rl_device: str,
    graphics_device_id: int = -1,
    device_type: str = "cuda",
    headless: bool = False,
    multi_gpu: bool = False,
    virtual_screen_capture: bool = False,
    force_render: bool = True,
    cfg: DictConfig = None
):
    from timechamber.utils.rlgames_utils import get_rlgames_env_creator
    # create hydra config if no config passed in
    if cfg is None:
        # reset current hydra config if already parsed (but not passed in here)
        if HydraConfig.initialized():
            task = HydraConfig.get().runtime.choices['task']
            hydra.core.global_hydra.GlobalHydra.instance().clear()

        with initialize(config_path="./cfg"):
            cfg = compose(config_name="config", overrides=[f"task={task}"])
            task_dict = omegaconf_to_dict(cfg.task)
            task_dict['env']['numEnvs'] = num_envs
    # reuse existing config
    else:
        task_dict = omegaconf_to_dict(cfg.task)
    task_dict['seed'] = cfg.seed
    task_dict['rl_device'] = rl_device
    if cfg.motion_file:
        task_dict['env']['motion_file'] = cfg.motion_file
    
    create_rlgpu_env = get_rlgames_env_creator(
        seed=seed,
        cfg=cfg,
        task_config=task_dict,
        task_name=task_dict["name"],
        sim_device=sim_device,
        rl_device=rl_device,
        graphics_device_id=graphics_device_id,
        headless=headless,
        device_type=device_type,
        multi_gpu=multi_gpu,
        virtual_screen_capture=virtual_screen_capture,
        force_render=force_render,
    )
    return create_rlgpu_env()


================================================
FILE: timechamber/ase/ase_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import torch
import torch.nn as nn
from isaacgym.torch_utils import *
from rl_games.algos_torch import torch_ext
from rl_games.common import a2c_common
from rl_games.algos_torch.running_mean_std import RunningMeanStd

from timechamber.ase import ase_network_builder
from timechamber.ase.utils import amp_agent 

class ASEAgent(amp_agent.AMPAgent):
    def __init__(self, base_name, config):
        super().__init__(base_name, config)
        return

    def init_tensors(self):
        super().init_tensors()
        
        batch_shape = self.experience_buffer.obs_base_shape
        self.experience_buffer.tensor_dict['ase_latents'] = torch.zeros(batch_shape + (self._latent_dim,),
                                                                dtype=torch.float32, device=self.ppo_device)
        
        self._ase_latents = torch.zeros((batch_shape[-1], self._latent_dim), dtype=torch.float32,
                                         device=self.ppo_device)
        
        self.tensor_list += ['ase_latents']

        self._latent_reset_steps = torch.zeros(batch_shape[-1], dtype=torch.int32, device=self.ppo_device)
        num_envs = self.vec_env.env.task.num_envs
        env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device)
        self._reset_latent_step_count(env_ids)

        return
    
    def play_steps(self):
        self.set_eval()

        epinfos = []
        done_indices = []
        update_list = self.update_list

        for n in range(self.horizon_length):
            self.obs = self.env_reset(done_indices)
            self.experience_buffer.update_data('obses', n, self.obs['obs'])

            self._update_latents()

            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, self._ase_latents, masks)
            else:
                res_dict = self.get_action_values(self.obs, self._ase_latents, self._rand_action_probs)

            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k]) 

            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
            shaped_rewards = self.rewards_shaper(rewards)
            self.experience_buffer.update_data('rewards', n, shaped_rewards)
            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)
            self.experience_buffer.update_data('amp_obs', n, infos['amp_obs'])
            self.experience_buffer.update_data('ase_latents', n, self._ase_latents)
            self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask'])

            terminated = infos['terminate'].float()
            terminated = terminated.unsqueeze(-1)
            next_vals = self._eval_critic(self.obs, self._ase_latents)
            next_vals *= (1.0 - terminated)
            self.experience_buffer.update_data('next_values', n, next_vals)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            done_indices = all_done_indices[::self.num_agents]

            self.game_rewards.update(self.current_rewards[done_indices])
            self.game_lengths.update(self.current_lengths[done_indices])
            self.algo_observer.process_infos(infos, done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones
        
            if (self.vec_env.env.task.viewer):
                self._amp_debug(infos, self._ase_latents)

            done_indices = done_indices[:, 0]

        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_next_values = self.experience_buffer.tensor_dict['next_values']
        
        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs']
        mb_ase_latents = self.experience_buffer.tensor_dict['ase_latents']
        amp_rewards = self._calc_amp_rewards(mb_amp_obs, mb_ase_latents)
        mb_rewards = self._combine_rewards(mb_rewards, amp_rewards)
        
        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size

        for k, v in amp_rewards.items():
            batch_dict[k] = a2c_common.swap_and_flatten01(v)

        return batch_dict

    def get_action_values(self, obs_dict, ase_latents, rand_action_probs):
        processed_obs = self._preproc_obs(obs_dict['obs'])

        self.model.eval()
        input_dict = {
            'is_train': False,
            'prev_actions': None, 
            'obs' : processed_obs,
            'rnn_states' : self.rnn_states,
            'ase_latents': ase_latents
        }

        with torch.no_grad():
            res_dict = self.model(input_dict)
            if self.has_central_value:
                states = obs_dict['states']
                input_dict = {
                    'is_train': False,
                    'states' : states,
                }
                value = self.get_central_value(input_dict)
                res_dict['values'] = value

        if self.normalize_value:
            res_dict['values'] = self.value_mean_std(res_dict['values'], True)
        
        rand_action_mask = torch.bernoulli(rand_action_probs)
        det_action_mask = rand_action_mask == 0.0
        res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask]
        res_dict['rand_action_mask'] = rand_action_mask

        return res_dict

    def prepare_dataset(self, batch_dict):
        super().prepare_dataset(batch_dict)
        
        ase_latents = batch_dict['ase_latents']
        self.dataset.values_dict['ase_latents'] = ase_latents
        
        return

    def calc_gradients(self, input_dict):
        self.set_train()

        value_preds_batch = input_dict['old_values']
        old_action_log_probs_batch = input_dict['old_logp_actions']
        advantage = input_dict['advantages']
        old_mu_batch = input_dict['mu']
        old_sigma_batch = input_dict['sigma']
        return_batch = input_dict['returns']
        actions_batch = input_dict['actions']
        obs_batch = input_dict['obs']
        obs_batch = self._preproc_obs(obs_batch)

        amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size]
        amp_obs = self._preproc_amp_obs(amp_obs)
        if (self._enable_enc_grad_penalty()):
            amp_obs.requires_grad_(True)

        amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size]
        amp_obs_replay = self._preproc_amp_obs(amp_obs_replay)

        amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size]
        amp_obs_demo = self._preproc_amp_obs(amp_obs_demo)
        amp_obs_demo.requires_grad_(True)

        ase_latents = input_dict['ase_latents']
        
        rand_action_mask = input_dict['rand_action_mask']
        rand_action_sum = torch.sum(rand_action_mask)

        lr = self.last_lr
        kl = 1.0
        lr_mul = 1.0
        curr_e_clip = lr_mul * self.e_clip

        batch_dict = {
            'is_train': True,
            'prev_actions': actions_batch, 
            'obs' : obs_batch,
            'amp_obs' : amp_obs,
            'amp_obs_replay' : amp_obs_replay,
            'amp_obs_demo' : amp_obs_demo,
            'ase_latents': ase_latents
        }

        rnn_masks = None
        if self.is_rnn:
            rnn_masks = input_dict['rnn_masks']
            batch_dict['rnn_states'] = input_dict['rnn_states']
            batch_dict['seq_length'] = self.seq_len
            
        rnn_masks = None
        if self.is_rnn:
            rnn_masks = input_dict['rnn_masks']
            batch_dict['rnn_states'] = input_dict['rnn_states']
            batch_dict['seq_length'] = self.seq_len

        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
            res_dict = self.model(batch_dict)
            action_log_probs = res_dict['prev_neglogp']
            values = res_dict['values']
            entropy = res_dict['entropy']
            mu = res_dict['mus']
            sigma = res_dict['sigmas']
            disc_agent_logit = res_dict['disc_agent_logit']
            disc_agent_replay_logit = res_dict['disc_agent_replay_logit']
            disc_demo_logit = res_dict['disc_demo_logit']
            enc_pred = res_dict['enc_pred']

            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
            a_loss = a_info['actor_loss']
            a_clipped = a_info['actor_clipped'].float()

            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
            c_loss = c_info['critic_loss']

            b_loss = self.bound_loss(mu)

            c_loss = torch.mean(c_loss)
            a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum
            entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum
            b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum
            a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum
            
            disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0)
            disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo)
            disc_loss = disc_info['disc_loss']
            
            enc_latents = batch_dict['ase_latents'][0:self._amp_minibatch_size]
            enc_loss_mask = rand_action_mask[0:self._amp_minibatch_size]
            enc_info = self._enc_loss(enc_pred, enc_latents, batch_dict['amp_obs'], enc_loss_mask)
            enc_loss = enc_info['enc_loss']

            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \
                 + self._disc_coef * disc_loss + self._enc_coef * enc_loss
            
            if (self._enable_amp_diversity_bonus()):
                diversity_loss = self._diversity_loss(batch_dict['obs'], mu, batch_dict['ase_latents'])
                diversity_loss = torch.sum(rand_action_mask * diversity_loss) / rand_action_sum
                loss += self._amp_diversity_bonus * diversity_loss
                a_info['amp_diversity_loss'] = diversity_loss
                
            a_info['actor_loss'] = a_loss
            a_info['actor_clip_frac'] = a_clip_frac
            c_info['critic_loss'] = c_loss

            if self.multi_gpu:
                self.optimizer.zero_grad()
            else:
                for param in self.model.parameters():
                    param.grad = None

        self.scaler.scale(loss).backward()
        #TODO: Refactor this ugliest code of the year
        if self.truncate_grads:
            if self.multi_gpu:
                self.optimizer.synchronize()
                self.scaler.unscale_(self.optimizer)
                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
                with self.optimizer.skip_synchronize():
                    self.scaler.step(self.optimizer)
                    self.scaler.update()
            else:
                self.scaler.unscale_(self.optimizer)
                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
                self.scaler.step(self.optimizer)
                self.scaler.update()    
        else:
            self.scaler.step(self.optimizer)
            self.scaler.update()

        with torch.no_grad():
            reduce_kl = not self.is_rnn
            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
            if self.is_rnn:
                kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel()  #/ sum_mask
        
        self.train_result = {
            'entropy': entropy,
            'kl': kl_dist,
            'last_lr': self.last_lr, 
            'lr_mul': lr_mul, 
            'b_loss': b_loss
        }
        self.train_result.update(a_info)
        self.train_result.update(c_info)
        self.train_result.update(disc_info)
        self.train_result.update(enc_info)

        return
    
    def env_reset(self, env_ids=None):
        obs = super().env_reset(env_ids)
        
        if (env_ids is None):
            num_envs = self.vec_env.env.task.num_envs
            env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device)

        if (len(env_ids) > 0):
            self._reset_latents(env_ids)
            self._reset_latent_step_count(env_ids)

        return obs

    def _reset_latent_step_count(self, env_ids):
        self._latent_reset_steps[env_ids] = torch.randint_like(self._latent_reset_steps[env_ids], low=self._latent_steps_min, 
                                                         high=self._latent_steps_max)
        return

    def _load_config_params(self, config):
        super()._load_config_params(config)
        
        self._latent_dim = config['latent_dim']
        self._latent_steps_min = config.get('latent_steps_min', np.inf)
        self._latent_steps_max = config.get('latent_steps_max', np.inf)
        self._latent_dim = config['latent_dim']
        self._amp_diversity_bonus = config['amp_diversity_bonus']
        self._amp_diversity_tar = config['amp_diversity_tar']
        
        self._enc_coef = config['enc_coef']
        self._enc_weight_decay = config['enc_weight_decay']
        self._enc_reward_scale = config['enc_reward_scale']
        self._enc_grad_penalty = config['enc_grad_penalty']

        self._enc_reward_w = config['enc_reward_w']

        return
    
    def _build_net_config(self):
        config = super()._build_net_config()
        config['ase_latent_shape'] = (self._latent_dim,)
        return config

    def _reset_latents(self, env_ids):
        n = len(env_ids)
        z = self._sample_latents(n)
        self._ase_latents[env_ids] = z

        if (self.vec_env.env.task.viewer):
            self._change_char_color(env_ids)

        return

    def _sample_latents(self, n):
        z = self.model.a2c_network.sample_latents(n)
        return z

    def _update_latents(self):
        new_latent_envs = self._latent_reset_steps <= self.vec_env.env.task.progress_buf

        need_update = torch.any(new_latent_envs)
        if (need_update):
            new_latent_env_ids = new_latent_envs.nonzero(as_tuple=False).flatten()
            self._reset_latents(new_latent_env_ids)
            self._latent_reset_steps[new_latent_env_ids] += torch.randint_like(self._latent_reset_steps[new_latent_env_ids],
                                                                               low=self._latent_steps_min, 
                                                                               high=self._latent_steps_max)
            if (self.vec_env.env.task.viewer):
                self._change_char_color(new_latent_env_ids)

        return

    def _eval_actor(self, obs, ase_latents):
        output = self.model.eval_actor(obs=obs, ase_latents=ase_latents)
        return output

    def _eval_critic(self, obs_dict, ase_latents):
        self.model.eval()
        obs = obs_dict['obs']
        processed_obs = self._preproc_obs(obs)
        value = self.model.eval_critic(processed_obs, ase_latents)

        if self.normalize_value:
            value = self.value_mean_std(value, True)
        return value

    def _calc_amp_rewards(self, amp_obs, ase_latents):
        disc_r = self._calc_disc_rewards(amp_obs)
        enc_r = self._calc_enc_rewards(amp_obs, ase_latents)
        output = {
            'disc_rewards': disc_r,
            'enc_rewards': enc_r
        }
        return output

    def _calc_enc_rewards(self, amp_obs, ase_latents):
        with torch.no_grad():
            enc_pred = self._eval_enc(amp_obs)
            err = self._calc_enc_error(enc_pred, ase_latents)
            enc_r = torch.clamp_min(-err, 0.0)
            enc_r *= self._enc_reward_scale

        return enc_r

    def _enc_loss(self, enc_pred, ase_latent, enc_obs, loss_mask):
        enc_err = self._calc_enc_error(enc_pred, ase_latent)
        #mask_sum = torch.sum(loss_mask)
        #enc_err = enc_err.squeeze(-1)
        #enc_loss = torch.sum(loss_mask * enc_err) / mask_sum
        enc_loss = torch.mean(enc_err)

        # weight decay
        if (self._enc_weight_decay != 0):
            enc_weights = self.model.a2c_network.get_enc_weights()
            enc_weights = torch.cat(enc_weights, dim=-1)
            enc_weight_decay = torch.sum(torch.square(enc_weights))
            enc_loss += self._enc_weight_decay * enc_weight_decay
            
        enc_info = {
            'enc_loss': enc_loss
        }

        if (self._enable_enc_grad_penalty()):
            enc_obs_grad = torch.autograd.grad(enc_err, enc_obs, grad_outputs=torch.ones_like(enc_err),
                                               create_graph=True, retain_graph=True, only_inputs=True)
            enc_obs_grad = enc_obs_grad[0]
            enc_obs_grad = torch.sum(torch.square(enc_obs_grad), dim=-1)
            #enc_grad_penalty = torch.sum(loss_mask * enc_obs_grad) / mask_sum
            enc_grad_penalty = torch.mean(enc_obs_grad)

            enc_loss += self._enc_grad_penalty * enc_grad_penalty

            enc_info['enc_grad_penalty'] = enc_grad_penalty.detach()

        return enc_info

    def _diversity_loss(self, obs, action_params, ase_latents):
        assert(self.model.a2c_network.is_continuous)

        n = obs.shape[0]
        assert(n == action_params.shape[0])

        new_z = self._sample_latents(n)
        mu, sigma = self._eval_actor(obs=obs, ase_latents=new_z)

        clipped_action_params = torch.clamp(action_params, -1.0, 1.0)
        clipped_mu = torch.clamp(mu, -1.0, 1.0)

        a_diff = clipped_action_params - clipped_mu
        a_diff = torch.mean(torch.square(a_diff), dim=-1)

        z_diff = new_z * ase_latents
        z_diff = torch.sum(z_diff, dim=-1)
        z_diff = 0.5 - 0.5 * z_diff

        diversity_bonus = a_diff / (z_diff + 1e-5)
        diversity_loss = torch.square(self._amp_diversity_tar - diversity_bonus)

        return diversity_loss

    def _calc_enc_error(self, enc_pred, ase_latent):
        err = enc_pred * ase_latent
        err = -torch.sum(err, dim=-1, keepdim=True)
        return err

    def _enable_enc_grad_penalty(self):
        return self._enc_grad_penalty != 0

    def _enable_amp_diversity_bonus(self):
        return self._amp_diversity_bonus != 0

    def _eval_enc(self, amp_obs):
        proc_amp_obs = self._preproc_amp_obs(amp_obs)
        return self.model.a2c_network.eval_enc(proc_amp_obs)

    def _combine_rewards(self, task_rewards, amp_rewards):
        disc_r = amp_rewards['disc_rewards']
        enc_r = amp_rewards['enc_rewards']
        combined_rewards = self._task_reward_w * task_rewards \
                         + self._disc_reward_w * disc_r \
                         + self._enc_reward_w * enc_r
        return combined_rewards

    def _record_train_batch_info(self, batch_dict, train_info):
        super()._record_train_batch_info(batch_dict, train_info)
        train_info['enc_rewards'] = batch_dict['enc_rewards']
        return

    def _log_train_info(self, train_info, frame):
        super()._log_train_info(train_info, frame)
        
        self.writer.add_scalar('losses/enc_loss', torch_ext.mean_list(train_info['enc_loss']).item(), frame)
         
        if (self._enable_amp_diversity_bonus()):
            self.writer.add_scalar('losses/amp_diversity_loss', torch_ext.mean_list(train_info['amp_diversity_loss']).item(), frame)
        
        enc_reward_std, enc_reward_mean = torch.std_mean(train_info['enc_rewards'])
        self.writer.add_scalar('info/enc_reward_mean', enc_reward_mean.item(), frame)
        self.writer.add_scalar('info/enc_reward_std', enc_reward_std.item(), frame)

        if (self._enable_enc_grad_penalty()):
            self.writer.add_scalar('info/enc_grad_penalty', torch_ext.mean_list(train_info['enc_grad_penalty']).item(), frame)

        return

    def _change_char_color(self, env_ids):
        base_col = np.array([0.4, 0.4, 0.4])
        range_col = np.array([0.0706, 0.149, 0.2863])
        range_sum = np.linalg.norm(range_col)

        rand_col = np.random.uniform(0.0, 1.0, size=3)
        rand_col = range_sum * rand_col / np.linalg.norm(rand_col)
        rand_col += base_col
        self.vec_env.env.task.set_char_color(rand_col, env_ids)
        return

    def _amp_debug(self, info, ase_latents):
        with torch.no_grad():
            amp_obs = info['amp_obs']
            amp_obs = amp_obs
            ase_latents = ase_latents
            disc_pred = self._eval_disc(amp_obs)
            amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents)
            disc_reward = amp_rewards['disc_rewards']
            enc_reward = amp_rewards['enc_rewards']

            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
            disc_reward = disc_reward.cpu().numpy()[0, 0]
            enc_reward = enc_reward.cpu().numpy()[0, 0]
            print("disc_pred: ", disc_pred, disc_reward, enc_reward)
        return

================================================
FILE: timechamber/ase/ase_models.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from timechamber.ase.utils import amp_models

class ModelASEContinuous(amp_models.ModelAMPContinuous):
    def __init__(self, network):
        super().__init__(network)
        return

    def build(self, config):
        net = self.network_builder.build('ase', **config)
        for name, _ in net.named_parameters():
            print(name)
        # print(f"ASE config: {config}")
        obs_shape = config['input_shape']
        normalize_value = config.get('normalize_value', False)
        normalize_input = config.get('normalize_input', False)
        value_size = config.get('value_size', 1)
        return ModelASEContinuous.Network(net,obs_shape=obs_shape, normalize_value=normalize_value,
                                          normalize_input=normalize_input, value_size=value_size)


    class Network(amp_models.ModelAMPContinuous.Network):
        def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size):
            super().__init__(a2c_network,
                             obs_shape=obs_shape, 
                             normalize_value=normalize_value,
                             normalize_input=normalize_input, 
                             value_size=value_size)
            return

        def forward(self, input_dict):
            is_train = input_dict.get('is_train', True)
            result = super().forward(input_dict)

            if (is_train):
                amp_obs = input_dict['amp_obs']
                enc_pred = self.a2c_network.eval_enc(amp_obs)
                result["enc_pred"] = enc_pred

            return result

        def eval_actor(self, obs, ase_latents, use_hidden_latents=False):
            processed_obs = self.norm_obs(obs)
            mu, sigma = self.a2c_network.eval_actor(obs=processed_obs, ase_latents=ase_latents)
            return mu, sigma

        def eval_critic(self, obs, ase_latents, use_hidden_latents=False):
            processed_obs = self.norm_obs(obs)
            value = self.a2c_network.eval_critic(processed_obs, ase_latents, use_hidden_latents)
            return value

================================================
FILE: timechamber/ase/ase_network_builder.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import layers
from rl_games.algos_torch import network_builder

import torch
import torch.nn as nn
import numpy as np
import enum

from timechamber.ase.utils import amp_network_builder

ENC_LOGIT_INIT_SCALE = 0.1

class LatentType(enum.Enum):
    uniform = 0
    sphere = 1

class ASEBuilder(amp_network_builder.AMPBuilder):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        return

    class Network(amp_network_builder.AMPBuilder.Network):
        def __init__(self, params, **kwargs):
            actions_num = kwargs.get('actions_num')
            input_shape = kwargs.get('input_shape')
            self.value_size = kwargs.get('value_size', 1)
            self.num_seqs = num_seqs = kwargs.get('num_seqs', 1)
            amp_input_shape = kwargs.get('amp_input_shape')
            self._ase_latent_shape = kwargs.get('ase_latent_shape')

            network_builder.NetworkBuilder.BaseNetwork.__init__(self)
            
            self.load(params)

            actor_out_size, critic_out_size = self._build_actor_critic_net(input_shape, self._ase_latent_shape)

            self.value = torch.nn.Linear(critic_out_size, self.value_size)
            self.value_act = self.activations_factory.create(self.value_activation)
            
            if self.is_discrete:
                self.logits = torch.nn.Linear(actor_out_size, actions_num)
            '''
                for multidiscrete actions num is a tuple
            '''
            if self.is_multi_discrete:
                self.logits = torch.nn.ModuleList([torch.nn.Linear(actor_out_size, num) for num in actions_num])
            if self.is_continuous:
                self.mu = torch.nn.Linear(actor_out_size, actions_num)
                self.mu_act = self.activations_factory.create(self.space_config['mu_activation']) 
                mu_init = self.init_factory.create(**self.space_config['mu_init'])
                self.sigma_act = self.activations_factory.create(self.space_config['sigma_activation']) 

                sigma_init = self.init_factory.create(**self.space_config['sigma_init'])

                if (not self.space_config['learn_sigma']):
                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
                elif self.space_config['fixed_sigma']:
                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
                else:
                    self.sigma = torch.nn.Linear(actor_out_size, actions_num)

            mlp_init = self.init_factory.create(**self.initializer)
            if self.has_cnn:
                cnn_init = self.init_factory.create(**self.cnn['initializer'])

            for m in self.modules():         
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
                    cnn_init(m.weight)
                    if getattr(m, "bias", None) is not None:
                        torch.nn.init.zeros_(m.bias)
                if isinstance(m, nn.Linear):
                    mlp_init(m.weight)
                    if getattr(m, "bias", None) is not None:
                        torch.nn.init.zeros_(m.bias)    

            self.actor_mlp.init_params()
            self.critic_mlp.init_params()

            if self.is_continuous:
                mu_init(self.mu.weight)
                if self.space_config['fixed_sigma']:
                    sigma_init(self.sigma)
                else:
                    sigma_init(self.sigma.weight)

            self._build_disc(amp_input_shape)
            self._build_enc(amp_input_shape)

            return

        def load(self, params):
            super().load(params)

            self._enc_units = params['enc']['units']
            self._enc_activation = params['enc']['activation']
            self._enc_initializer = params['enc']['initializer']
            self._enc_separate = params['enc']['separate']

            return

        def forward(self, obs_dict):
            obs = obs_dict['obs']
            ase_latents = obs_dict['ase_latents']
            states = obs_dict.get('rnn_states', None)
            use_hidden_latents = obs_dict.get('use_hidden_latents', False)

            actor_outputs = self.eval_actor(obs, ase_latents, use_hidden_latents)
            value = self.eval_critic(obs, ase_latents, use_hidden_latents)

            output = actor_outputs + (value, states)

            return output

        def eval_critic(self, obs, ase_latents, use_hidden_latents=False):
            c_out = self.critic_cnn(obs)
            c_out = c_out.contiguous().view(c_out.size(0), -1)
            
            c_out = self.critic_mlp(c_out, ase_latents, use_hidden_latents)
            value = self.value_act(self.value(c_out))
            return value

        def eval_actor(self, obs, ase_latents, use_hidden_latents=False):
            a_out = self.actor_cnn(obs)
            a_out = a_out.contiguous().view(a_out.size(0), -1)
            a_out = self.actor_mlp(a_out, ase_latents, use_hidden_latents)
                     
            if self.is_discrete:
                logits = self.logits(a_out)
                return logits

            if self.is_multi_discrete:
                logits = [logit(a_out) for logit in self.logits]
                return logits

            if self.is_continuous:
                mu = self.mu_act(self.mu(a_out))
                if self.space_config['fixed_sigma']:
                    sigma = mu * 0.0 + self.sigma_act(self.sigma)
                else:
                    sigma = self.sigma_act(self.sigma(a_out))

                return mu, sigma
            return

        def get_enc_weights(self):
            weights = []
            for m in self._enc_mlp.modules():
                if isinstance(m, nn.Linear):
                    weights.append(torch.flatten(m.weight))

            weights.append(torch.flatten(self._enc.weight))
            return weights

        def _build_actor_critic_net(self, input_shape, ase_latent_shape):
            style_units = [512, 256]
            style_dim = ase_latent_shape[-1]

            self.actor_cnn = nn.Sequential()
            self.critic_cnn = nn.Sequential()
            
            act_fn = self.activations_factory.create(self.activation)
            initializer = self.init_factory.create(**self.initializer)

            self.actor_mlp = AMPStyleCatNet1(obs_size=input_shape[-1],
                                             ase_latent_size=ase_latent_shape[-1],
                                             units=self.units,
                                             activation=act_fn,
                                             style_units=style_units,
                                             style_dim=style_dim,
                                             initializer=initializer)

            if self.separate:
                self.critic_mlp = AMPMLPNet(obs_size=input_shape[-1],
                                            ase_latent_size=ase_latent_shape[-1],
                                            units=self.units,
                                            activation=act_fn,
                                            initializer=initializer)

            actor_out_size = self.actor_mlp.get_out_size()
            critic_out_size = self.critic_mlp.get_out_size()

            return actor_out_size, critic_out_size

        def _build_enc(self, input_shape):
            if (self._enc_separate):
                self._enc_mlp = nn.Sequential()
                mlp_args = {
                    'input_size' : input_shape[0], 
                    'units' : self._enc_units, 
                    'activation' : self._enc_activation, 
                    'dense_func' : torch.nn.Linear
                }
                self._enc_mlp = self._build_mlp(**mlp_args)

                mlp_init = self.init_factory.create(**self._enc_initializer)
                for m in self._enc_mlp.modules():
                    if isinstance(m, nn.Linear):
                        mlp_init(m.weight)
                        if getattr(m, "bias", None) is not None:
                            torch.nn.init.zeros_(m.bias)
            else:
                self._enc_mlp = self._disc_mlp

            mlp_out_layer = list(self._enc_mlp.modules())[-2]
            mlp_out_size = mlp_out_layer.out_features
            self._enc = torch.nn.Linear(mlp_out_size, self._ase_latent_shape[-1])
            
            torch.nn.init.uniform_(self._enc.weight, -ENC_LOGIT_INIT_SCALE, ENC_LOGIT_INIT_SCALE)
            torch.nn.init.zeros_(self._enc.bias) 
            
            return

        def eval_enc(self, amp_obs):
            enc_mlp_out = self._enc_mlp(amp_obs)
            enc_output = self._enc(enc_mlp_out)
            enc_output = torch.nn.functional.normalize(enc_output, dim=-1)

            return enc_output

        def sample_latents(self, n):
            device = next(self._enc.parameters()).device
            z = torch.normal(torch.zeros([n, self._ase_latent_shape[-1]], device=device))
            z = torch.nn.functional.normalize(z, dim=-1)
            return z

    def build(self, name, **kwargs):
        net = ASEBuilder.Network(self.params, **kwargs)
        return net


class AMPMLPNet(torch.nn.Module):
    def __init__(self, obs_size, ase_latent_size, units, activation, initializer):
        super().__init__()

        input_size = obs_size + ase_latent_size
        print('build amp mlp net:', input_size)
        
        self._units = units
        self._initializer = initializer
        self._mlp = []

        in_size = input_size
        for i in range(len(units)):
            unit = units[i]
            curr_dense = torch.nn.Linear(in_size, unit)
            self._mlp.append(curr_dense)
            self._mlp.append(activation)
            in_size = unit

        self._mlp = nn.Sequential(*self._mlp)
        self.init_params()
        return

    def forward(self, obs, latent, skip_style):
        inputs = [obs, latent]
        input = torch.cat(inputs, dim=-1)
        output = self._mlp(input)
        return output

    def init_params(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                self._initializer(m.weight)
                if getattr(m, "bias", None) is not None:
                    torch.nn.init.zeros_(m.bias)
        return

    def get_out_size(self):
        out_size = self._units[-1]
        return out_size

class AMPStyleCatNet1(torch.nn.Module):
    def __init__(self, obs_size, ase_latent_size, units, activation,
                 style_units, style_dim, initializer):
        super().__init__()

        print('build amp style cat net:', obs_size, ase_latent_size)
            
        self._activation = activation
        self._initializer = initializer
        self._dense_layers = []
        self._units = units
        self._style_dim = style_dim
        self._style_activation = torch.tanh

        self._style_mlp = self._build_style_mlp(style_units, ase_latent_size)
        self._style_dense = torch.nn.Linear(style_units[-1], style_dim)

        in_size = obs_size + style_dim
        for i in range(len(units)):
            unit = units[i]
            out_size = unit
            curr_dense = torch.nn.Linear(in_size, out_size)
            self._dense_layers.append(curr_dense)
            
            in_size = out_size

        self._dense_layers = nn.ModuleList(self._dense_layers)

        self.init_params()

        return

    def forward(self, obs, latent, skip_style):
        if (skip_style):
            style = latent
        else:
            style = self.eval_style(latent)

        h = torch.cat([obs, style], dim=-1)

        for i in range(len(self._dense_layers)):
            curr_dense = self._dense_layers[i]
            h = curr_dense(h)
            h = self._activation(h)

        return h

    def eval_style(self, latent):
        style_h = self._style_mlp(latent)
        style = self._style_dense(style_h)
        style = self._style_activation(style)
        return style

    def init_params(self):
        scale_init_range = 1.0

        for m in self.modules():
            if isinstance(m, nn.Linear):
                self._initializer(m.weight)
                if getattr(m, "bias", None) is not None:
                    torch.nn.init.zeros_(m.bias)

        nn.init.uniform_(self._style_dense.weight, -scale_init_range, scale_init_range)
        return

    def get_out_size(self):
        out_size = self._units[-1]
        return out_size

    def _build_style_mlp(self, style_units, input_size):
        in_size = input_size
        layers = []
        for unit in style_units:
            layers.append(torch.nn.Linear(in_size, unit))
            layers.append(self._activation)
            in_size = unit

        enc_mlp = nn.Sequential(*layers)
        return enc_mlp

================================================
FILE: timechamber/ase/ase_players.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from pytest import param
import torch

from isaacgym.torch_utils import *
from rl_games.algos_torch import players

from timechamber.ase.utils import amp_players
import timechamber.ase.ase_network_builder as ase_network_builder

class ASEPlayer(amp_players.AMPPlayerContinuous):
    def __init__(self, params):
        config = params['config']
        self._latent_dim = config['latent_dim']
        self._latent_steps_min = config.get('latent_steps_min', np.inf)
        self._latent_steps_max = config.get('latent_steps_max', np.inf)

        self._enc_reward_scale = config['enc_reward_scale']

        super().__init__(params)
        
        if (hasattr(self, 'env')) and self.env is not None:
            batch_size = self.env.task.num_envs
        else:
            batch_size = self.env_info['num_envs']
        self._ase_latents = torch.zeros((batch_size, self._latent_dim), dtype=torch.float32,
                                         device=self.device)

        return

    def run(self):
        self._reset_latent_step_count()
        super().run()
        return

    def get_action(self, obs_dict, is_determenistic=False):
        self._update_latents()

        obs = obs_dict['obs']
        if len(obs.size()) == len(self.obs_shape):
            obs = obs.unsqueeze(0)
        obs = self._preproc_obs(obs)
        ase_latents = self._ase_latents

        input_dict = {
            'is_train': False,
            'prev_actions': None, 
            'obs' : obs,
            'rnn_states' : self.states,
            'ase_latents': ase_latents
        }
        with torch.no_grad():
            res_dict = self.model(input_dict)
        mu = res_dict['mus']
        action = res_dict['actions']
        self.states = res_dict['rnn_states']
        if is_determenistic:
            current_action = mu
        else:
            current_action = action
        current_action = torch.squeeze(current_action.detach())
        return  players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0))

    def env_reset(self, env_ids=None):
        obs = super().env_reset(env_ids)
        self._reset_latents(env_ids)
        return obs
    
    def _build_net_config(self):
        config = super()._build_net_config()
        config['ase_latent_shape'] = (self._latent_dim,)
        return config
    
    def _reset_latents(self, done_env_ids=None):
        if (done_env_ids is None):
            num_envs = self.env.task.num_envs
            done_env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device)

        rand_vals = self.model.a2c_network.sample_latents(len(done_env_ids))
        self._ase_latents[done_env_ids] = rand_vals
        self._change_char_color(done_env_ids)

        return

    def _update_latents(self):
        if (self._latent_step_count <= 0):
            self._reset_latents()
            self._reset_latent_step_count()

            if (self.env.task.viewer):
                print("Sampling new amp latents------------------------------")
                num_envs = self.env.task.num_envs
                env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device)
                self._change_char_color(env_ids)
        else:
            self._latent_step_count -= 1
        return
    
    def _reset_latent_step_count(self):
        self._latent_step_count = np.random.randint(self._latent_steps_min, self._latent_steps_max)
        return

    def _calc_amp_rewards(self, amp_obs, ase_latents):
        disc_r = self._calc_disc_rewards(amp_obs)
        enc_r = self._calc_enc_rewards(amp_obs, ase_latents)
        output = {
            'disc_rewards': disc_r,
            'enc_rewards': enc_r
        }
        return output
    
    def _calc_enc_rewards(self, amp_obs, ase_latents):
        with torch.no_grad():
            enc_pred = self._eval_enc(amp_obs)
            err = self._calc_enc_error(enc_pred, ase_latents)
            enc_r = torch.clamp_min(-err, 0.0)
            enc_r *= self._enc_reward_scale

        return enc_r
    
    def _calc_enc_error(self, enc_pred, ase_latent):
        err = enc_pred * ase_latent
        err = -torch.sum(err, dim=-1, keepdim=True)
        return err
    
    def _eval_enc(self, amp_obs):
        proc_amp_obs = self._preproc_amp_obs(amp_obs)
        return self.model.a2c_network.eval_enc(proc_amp_obs)

    def _amp_debug(self, info):
        with torch.no_grad():
            amp_obs = info['amp_obs']
            amp_obs = amp_obs
            ase_latents = self._ase_latents
            disc_pred = self._eval_disc(amp_obs)
            amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents)
            disc_reward = amp_rewards['disc_rewards']
            enc_reward = amp_rewards['enc_rewards']

            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
            disc_reward = disc_reward.cpu().numpy()[0, 0]
            enc_reward = enc_reward.cpu().numpy()[0, 0]
            print("disc_pred: ", disc_pred, disc_reward, enc_reward)
        return

    def _change_char_color(self, env_ids):
        base_col = np.array([0.4, 0.4, 0.4])
        range_col = np.array([0.0706, 0.149, 0.2863])
        range_sum = np.linalg.norm(range_col)

        rand_col = np.random.uniform(0.0, 1.0, size=3)
        rand_col = range_sum * rand_col / np.linalg.norm(rand_col)
        rand_col += base_col
        self.env.task.set_char_color(rand_col, env_ids)
        return

================================================
FILE: timechamber/ase/hrl_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy
from datetime import datetime
from distutils.command.config import config
from gym import spaces
import numpy as np
import os
import time
import yaml

from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common import a2c_common
from rl_games.common import datasets
from rl_games.common import schedulers
from rl_games.common import vecenv

import torch
from torch import optim

import timechamber.ase.utils.common_agent as common_agent 
import timechamber.ase.ase_agent as ase_agent
import timechamber.ase.ase_models as ase_models
import timechamber.ase.ase_network_builder as ase_network_builder

from tensorboardX import SummaryWriter

class HRLAgent(common_agent.CommonAgent):
    def __init__(self, base_name, params):
        config = params['config']
        with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f:
            llc_config = yaml.load(f, Loader=yaml.SafeLoader)
            llc_config_params = llc_config['params']
            self._latent_dim = llc_config_params['config']['latent_dim']

        super().__init__(base_name, params)

        self._task_size = self.vec_env.env.task.get_task_obs_size()

        self._llc_steps = config['llc_steps']
        llc_checkpoint = config['llc_checkpoint']
        assert(llc_checkpoint != "")
        self._build_llc(llc_config_params, llc_checkpoint)

        return

    def env_step(self, actions):
        actions = self.preprocess_actions(actions)
        obs = self.obs['obs']

        rewards = 0.0
        disc_rewards = 0.0
        done_count = 0.0
        terminate_count = 0.0
        for t in range(self._llc_steps):
            llc_actions = self._compute_llc_action(obs, actions)
            obs_dict, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions)

            # TODO
            obs = obs_dict['obs']
            
            rewards += curr_rewards
            done_count += curr_dones
            terminate_count += infos['terminate']

            amp_obs = infos['amp_obs']
            curr_disc_reward = self._calc_disc_reward(amp_obs)
            disc_rewards += curr_disc_reward

        rewards /= self._llc_steps
        disc_rewards /= self._llc_steps

        dones = torch.zeros_like(done_count)
        dones[done_count > 0] = 1.0
        terminate = torch.zeros_like(terminate_count)
        terminate[terminate_count > 0] = 1.0
        infos['terminate'] = terminate
        infos['disc_rewards'] = disc_rewards

        if self.is_tensor_obses:
            if self.value_size == 1:
                rewards = rewards.unsqueeze(1)
            return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
        else:
            if self.value_size == 1:
                rewards = np.expand_dims(rewards, axis=1)
            return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(dones).to(self.ppo_device), infos

    def cast_obs(self, obs):
        obs = super().cast_obs(obs)
        self._llc_agent.is_tensor_obses = self.is_tensor_obses
        return obs

    def preprocess_actions(self, actions):
        clamped_actions = torch.clamp(actions, -1.0, 1.0)
        if not self.is_tensor_obses:
            clamped_actions = clamped_actions.cpu().numpy()
        return clamped_actions

    def play_steps(self):
        self.set_eval()
        
        epinfos = []
        done_indices = torch.tensor([], device=self.device, dtype=torch.long)
        update_list = self.update_list

        for n in range(self.horizon_length):
            self.obs = self.env_reset(done_indices)
            self.experience_buffer.update_data('obses', n, self.obs['obs'])

            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, masks)
            else:
                res_dict = self.get_action_values(self.obs)

            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k]) 

            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
            shaped_rewards = self.rewards_shaper(rewards)
            self.experience_buffer.update_data('rewards', n, shaped_rewards)
            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)
            
            self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards'])

            terminated = infos['terminate'].float()
            terminated = terminated.unsqueeze(-1)
            next_vals = self._eval_critic(self.obs)
            next_vals *= (1.0 - terminated)
            self.experience_buffer.update_data('next_values', n, next_vals)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            done_indices = all_done_indices[::self.num_agents]
  
            self.game_rewards.update(self.current_rewards[done_indices])
            self.game_lengths.update(self.current_lengths[done_indices])
            self.algo_observer.process_infos(infos, done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones

            done_indices = done_indices[:, 0]

        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_next_values = self.experience_buffer.tensor_dict['next_values']

        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards']
        mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards)

        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size

        return batch_dict
    
    def _load_config_params(self, config):
        super()._load_config_params(config)
        
        self._task_reward_w = config['task_reward_w']
        self._disc_reward_w = config['disc_reward_w']
        return

    def _get_mean_rewards(self):
        rewards = super()._get_mean_rewards()
        rewards *= self._llc_steps
        return rewards

    def _setup_action_space(self):
        super()._setup_action_space()
        self.actions_num = self._latent_dim
        return

    def init_tensors(self):
        super().init_tensors()

        del self.experience_buffer.tensor_dict['actions']
        del self.experience_buffer.tensor_dict['mus']
        del self.experience_buffer.tensor_dict['sigmas']

        batch_shape = self.experience_buffer.obs_base_shape
        self.experience_buffer.tensor_dict['actions'] = torch.zeros(batch_shape + (self._latent_dim,),
                                                                dtype=torch.float32, device=self.ppo_device)
        self.experience_buffer.tensor_dict['mus'] = torch.zeros(batch_shape + (self._latent_dim,),
                                                                dtype=torch.float32, device=self.ppo_device)
        self.experience_buffer.tensor_dict['sigmas'] = torch.zeros(batch_shape + (self._latent_dim,),
                                                                dtype=torch.float32, device=self.ppo_device)
        
        self.experience_buffer.tensor_dict['disc_rewards'] = torch.zeros_like(self.experience_buffer.tensor_dict['rewards'])
        self.tensor_list += ['disc_rewards']

        return

    def _build_llc(self, config_params, checkpoint_file):
        llc_agent_config = self._build_llc_agent_config(config_params)
        self._llc_agent = ase_agent.ASEAgent('llc', llc_agent_config)
        self._llc_agent.restore(checkpoint_file)
        print("Loaded LLC checkpoint from {:s}".format(checkpoint_file))
        self._llc_agent.set_eval()
        return

    def _build_llc_agent_config(self, config_params, network=None):
        llc_env_info = copy.deepcopy(self.env_info)
        obs_space = llc_env_info['observation_space']
        obs_size = obs_space.shape[0]
        obs_size -= self._task_size
        llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])

        params = config_params
        params['config']['network'] = network
        params['config']['num_actors'] = self.num_actors
        params['config']['features'] = {'observer' : self.algo_observer}
        params['config']['env_info'] = llc_env_info
        params['config']['device'] = self.device

        return params

    def _compute_llc_action(self, obs, actions):
        llc_obs = self._extract_llc_obs(obs)
        processed_obs = self._llc_agent._preproc_obs(llc_obs)

        z = torch.nn.functional.normalize(actions, dim=-1)
        mu, _ = self._llc_agent.model.eval_actor(obs=processed_obs, ase_latents=z)
        llc_action = mu
        llc_action = self._llc_agent.preprocess_actions(llc_action)

        return llc_action

    def _extract_llc_obs(self, obs):
        obs_size = obs.shape[-1]
        llc_obs = obs[..., :obs_size - self._task_size]
        return llc_obs

    def _calc_disc_reward(self, amp_obs):
        disc_reward = self._llc_agent._calc_disc_rewards(amp_obs)
        return disc_reward

    def _combine_rewards(self, task_rewards, disc_rewards): 
        combined_rewards = self._task_reward_w * task_rewards + \
                         + self._disc_reward_w * disc_rewards
        
        #combined_rewards = task_rewards * disc_rewards
        return combined_rewards

    def _record_train_batch_info(self, batch_dict, train_info):
        super()._record_train_batch_info(batch_dict, train_info)
        train_info['disc_rewards'] = batch_dict['disc_rewards']
        return

    def _log_train_info(self, train_info, frame):
        super()._log_train_info(train_info, frame)

        disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards'])
        self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame)
        self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame)
        return

================================================
FILE: timechamber/ase/hrl_models.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch.nn as nn
from rl_games.algos_torch.models import ModelA2CContinuousLogStd

class ModelHRLContinuous(ModelA2CContinuousLogStd):
    def __init__(self, network):
        super().__init__(network)
        return

    def build(self, config):
        net = self.network_builder.build('amp', **config)
        for name, _ in net.named_parameters():
            print(name)
        # print(f"ASE config: {config}")
        obs_shape = config['input_shape']
        normalize_value = config.get('normalize_value', False)
        normalize_input = config.get('normalize_input', False)
        value_size = config.get('value_size', 1)
        return ModelHRLContinuous.Network(net, obs_shape=obs_shape, normalize_value=normalize_value,
                                          normalize_input=normalize_input, value_size=value_size)

    class Network(ModelA2CContinuousLogStd.Network):
        def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size):
            super().__init__(a2c_network,
                             obs_shape=obs_shape,
                             normalize_value=normalize_value,
                             normalize_input=normalize_input, 
                             value_size=value_size)
            return

        def eval_critic(self, obs):
            processed_obs = self.norm_obs(obs)
            value = self.a2c_network.eval_critic(processed_obs)
            values = self.unnorm_value(value)
            return values

================================================
FILE: timechamber/ase/hrl_network_builder.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from rl_games.algos_torch import network_builder

import torch
import torch.nn as nn

from timechamber.ase import ase_network_builder

class HRLBuilder(network_builder.A2CBuilder):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        return

    class Network(network_builder.A2CBuilder.Network):
        def __init__(self, params, **kwargs):
            super().__init__(params, **kwargs)

            if self.is_continuous:
                if (not self.space_config['learn_sigma']):
                    actions_num = kwargs.get('actions_num')
                    sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
                    sigma_init(self.sigma)

            return
        
        def forward(self, obs_dict):
            mu, sigma, value, states = super().forward(obs_dict)
            norm_mu = torch.tanh(mu)
            return norm_mu, sigma, value, states

        def eval_critic(self, obs):
            c_out = self.critic_cnn(obs)
            c_out = c_out.contiguous().view(c_out.size(0), -1)
            c_out = self.critic_mlp(c_out)              
            value = self.value_act(self.value(c_out))
            return value

    def build(self, name, **kwargs):
        net = HRLBuilder.Network(self.params, **kwargs)
        return net

================================================
FILE: timechamber/ase/hrl_players.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy
from gym import spaces
import numpy as np
import os
import torch 
import yaml
import time

from rl_games.algos_torch import players
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common.player import BasePlayer

import timechamber.ase.utils.common_player as common_player
import timechamber.ase.ase_models as ase_models
import timechamber.ase.ase_network_builder as ase_network_builder
import timechamber.ase.ase_players as ase_players

class HRLPlayer(common_player.CommonPlayer):
    def __init__(self, params):
        config = params['config']
        with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f:
            llc_config = yaml.load(f, Loader=yaml.SafeLoader)
            llc_config_params = llc_config['params']
            self._latent_dim = llc_config_params['config']['latent_dim']

        super().__init__(params)

        self._task_size = self.env.task.get_task_obs_size()
        
        self._llc_steps = config['llc_steps']
        llc_checkpoint = config['llc_checkpoint']
        assert(llc_checkpoint != "")
        self._build_llc(llc_config_params, llc_checkpoint)

        return

    def get_action(self, obs_dict, is_determenistic = False):
        obs = obs_dict['obs']

        if len(obs.size()) == len(self.obs_shape):
            obs = obs.unsqueeze(0)
        proc_obs = self._preproc_obs(obs)
        input_dict = {
            'is_train': False,
            'prev_actions': None, 
            'obs' : proc_obs,
            'rnn_states' : self.states
        }
        with torch.no_grad():
            res_dict = self.model(input_dict)
        mu = res_dict['mus']
        action = res_dict['actions']
        self.states = res_dict['rnn_states']
        if is_determenistic:
            current_action = mu
        else:
            current_action = action
        current_action = torch.squeeze(current_action.detach())
        clamped_actions = torch.clamp(current_action, -1.0, 1.0)
        
        return clamped_actions

    def run(self):
        n_games = self.games_num
        render = self.render_env
        n_game_life = self.n_game_life
        is_determenistic = self.is_determenistic
        sum_rewards = 0
        sum_steps = 0
        sum_game_res = 0
        n_games = n_games * n_game_life
        games_played = 0
        has_masks = False
        has_masks_func = getattr(self.env, "has_action_mask", None) is not None

        op_agent = getattr(self.env, "create_agent", None)
        if op_agent:
            agent_inited = True

        if has_masks_func:
            has_masks = self.env.has_action_mask()

        need_init_rnn = self.is_rnn
        for _ in range(n_games):
            if games_played >= n_games:
                break

            obs_dict = self.env_reset()
            batch_size = 1
            if len(obs_dict['obs'].size()) > len(self.obs_shape):
                batch_size = obs_dict['obs'].size()[0]
            self.batch_size = batch_size

            if need_init_rnn:
                self.init_rnn()
                need_init_rnn = False

            cr = torch.zeros(batch_size, dtype=torch.float32)
            steps = torch.zeros(batch_size, dtype=torch.float32)

            print_game_res = False

            done_indices = []

            for n in range(self.max_steps):
                obs_dict = self.env_reset(done_indices)

                if has_masks:
                    masks = self.env.get_action_mask()
                    action = self.get_masked_action(obs_dict, masks, is_determenistic)
                else:
                    action = self.get_action(obs_dict, is_determenistic)
                obs_dict, r, done, info = self.env_step(self.env, obs_dict, action)
                cr += r
                steps += 1
  
                self._post_step(info)

                if render:
                    self.env.render(mode = 'human')
                    time.sleep(self.render_sleep)

                all_done_indices = done.nonzero(as_tuple=False)
                done_indices = all_done_indices[::self.num_agents]
                done_count = len(done_indices)
                games_played += done_count

                if done_count > 0:
                    if self.is_rnn:
                        for s in self.states:
                            s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0

                    cur_rewards = cr[done_indices].sum().item()
                    cur_steps = steps[done_indices].sum().item()

                    cr = cr * (1.0 - done.float())
                    steps = steps * (1.0 - done.float())
                    sum_rewards += cur_rewards
                    sum_steps += cur_steps

                    game_res = 0.0
                    if isinstance(info, dict):
                        if 'battle_won' in info:
                            print_game_res = True
                            game_res = info.get('battle_won', 0.5)
                        if 'scores' in info:
                            print_game_res = True
                            game_res = info.get('scores', 0.5)
                    if self.print_stats:
                        if print_game_res:
                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
                        else:
                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)

                    sum_game_res += game_res
                    if batch_size//self.num_agents == 1 or games_played >= n_games:
                        break
        
                done_indices = done_indices[:, 0]

        print(sum_rewards)
        if print_game_res:
            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
        else:
            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)

        return

    def env_step(self, env, obs_dict, action):
        if not self.is_tensor_obses:
            actions = actions.cpu().numpy()

        obs = obs_dict['obs']
        rewards = 0.0
        done_count = 0.0
        disc_rewards = 0.0
        for t in range(self._llc_steps):
            llc_actions = self._compute_llc_action(obs, action)
            obs, curr_rewards, curr_dones, infos = env.step(llc_actions)

            rewards += curr_rewards
            done_count += curr_dones

            amp_obs = infos['amp_obs']
            curr_disc_reward = self._calc_disc_reward(amp_obs)
            curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy()
            disc_rewards += curr_disc_reward

        rewards /= self._llc_steps
        dones = torch.zeros_like(done_count)
        dones[done_count > 0] = 1.0

        disc_rewards /= self._llc_steps

        if isinstance(obs, dict):
            obs = obs['obs']
        if obs.dtype == np.float64:
            obs = np.float32(obs)
        if self.value_size > 1:
            rewards = rewards[0]
        if self.is_tensor_obses:
            return obs, rewards.cpu(), dones.cpu(), infos
        else:
            if np.isscalar(dones):
                rewards = np.expand_dims(np.asarray(rewards), 0)
                dones = np.expand_dims(np.asarray(dones), 0)
            return torch.from_numpy(obs).to(self.device), torch.from_numpy(rewards), torch.from_numpy(dones), infos

    def _build_llc(self, config_params, checkpoint_file):
        llc_agent_config = self._build_llc_agent_config(config_params)

        self._llc_agent = ase_players.ASEPlayer(llc_agent_config)
        self._llc_agent.restore(checkpoint_file)
        print("Loaded LLC checkpoint from {:s}".format(checkpoint_file))
        return

    def _build_llc_agent_config(self, config_params, network=None):
        llc_env_info = copy.deepcopy(self.env_info)
        obs_space = llc_env_info['observation_space']
        obs_size = obs_space.shape[0]
        obs_size -= self._task_size
        llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
        llc_env_info['amp_observation_space'] = self.env.amp_observation_space.shape
        llc_env_info['num_envs'] = self.env.task.num_envs

        params = config_params
        params['config']['network'] = network
        params['config']['env_info'] = llc_env_info

        return params

    def _setup_action_space(self):
        super()._setup_action_space()
        self.actions_num = self._latent_dim
        return

    def _compute_llc_action(self, obs, actions):
        llc_obs = self._extract_llc_obs(obs)
        processed_obs = self._llc_agent._preproc_obs(llc_obs)

        z = torch.nn.functional.normalize(actions, dim=-1)
        mu, _ = self._llc_agent.model.eval_actor(obs=processed_obs, ase_latents=z)
        llc_action = players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(mu, -1.0, 1.0))

        return llc_action

    def _extract_llc_obs(self, obs):
        obs_size = obs.shape[-1]
        llc_obs = obs[..., :obs_size - self._task_size]
        return llc_obs

    def _calc_disc_reward(self, amp_obs):
        disc_reward = self._llc_agent._calc_disc_rewards(amp_obs)
        return disc_reward


================================================
FILE: timechamber/ase/utils/amp_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.algos_torch import torch_ext
from rl_games.common import a2c_common
from rl_games.common import schedulers
from rl_games.common import vecenv

from isaacgym.torch_utils import *

import time
from datetime import datetime
import numpy as np
from torch import optim
import torch 
from torch import nn

import timechamber.ase.utils.replay_buffer as replay_buffer
import timechamber.ase.utils.common_agent as common_agent 

from tensorboardX import SummaryWriter

class AMPAgent(common_agent.CommonAgent):
    def __init__(self, base_name, params):
        super().__init__(base_name, params)

        if self._normalize_amp_input:
            self._amp_input_mean_std = RunningMeanStd(self._amp_observation_space.shape).to(self.ppo_device)

        return

    def init_tensors(self):
        super().init_tensors()
        self._build_amp_buffers()
        return
    
    def set_eval(self):
        super().set_eval()
        if self._normalize_amp_input:
            self._amp_input_mean_std.eval()
        return

    def set_train(self):
        super().set_train()
        if self._normalize_amp_input:
            self._amp_input_mean_std.train()
        return

    def get_stats_weights(self):
        state = super().get_stats_weights()
        if self._normalize_amp_input:
            state['amp_input_mean_std'] = self._amp_input_mean_std.state_dict()
        
        return state

    def set_stats_weights(self, weights):
        super().set_stats_weights(weights)
        if self._normalize_amp_input:
            self._amp_input_mean_std.load_state_dict(weights['amp_input_mean_std'])
        
        return

    def play_steps(self):
        self.set_eval()

        epinfos = []
        done_indices = []
        update_list = self.update_list

        for n in range(self.horizon_length):

            self.obs = self.env_reset(done_indices)
            self.experience_buffer.update_data('obses', n, self.obs['obs'])

            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, masks)
            else:
                res_dict = self.get_action_values(self.obs, self._rand_action_probs)

            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k]) 

            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
            shaped_rewards = self.rewards_shaper(rewards)
            self.experience_buffer.update_data('rewards', n, shaped_rewards)
            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)
            self.experience_buffer.update_data('amp_obs', n, infos['amp_obs'])
            self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask'])

            terminated = infos['terminate'].float()
            terminated = terminated.unsqueeze(-1)
            next_vals = self._eval_critic(self.obs)
            next_vals *= (1.0 - terminated)
            self.experience_buffer.update_data('next_values', n, next_vals)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            done_indices = all_done_indices[::self.num_agents]
  
            self.game_rewards.update(self.current_rewards[done_indices])
            self.game_lengths.update(self.current_lengths[done_indices])
            self.algo_observer.process_infos(infos, done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones
            
            if (self.vec_env.env.task.viewer):
                self._amp_debug(infos)
                
            done_indices = done_indices[:, 0]

        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_next_values = self.experience_buffer.tensor_dict['next_values']

        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs']
        amp_rewards = self._calc_amp_rewards(mb_amp_obs)
        mb_rewards = self._combine_rewards(mb_rewards, amp_rewards)

        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size

        for k, v in amp_rewards.items():
            batch_dict[k] = a2c_common.swap_and_flatten01(v)

        return batch_dict
    
    def get_action_values(self, obs_dict, rand_action_probs):
        processed_obs = self._preproc_obs(obs_dict['obs'])

        self.model.eval()
        input_dict = {
            'is_train': False,
            'prev_actions': None, 
            'obs' : processed_obs,
            'rnn_states' : self.rnn_states
        }

        with torch.no_grad():
            res_dict = self.model(input_dict)
            if self.has_central_value:
                states = obs_dict['states']
                input_dict = {
                    'is_train': False,
                    'states' : states,
                }
                value = self.get_central_value(input_dict)
                res_dict['values'] = value

        if self.normalize_value:
            res_dict['values'] = self.value_mean_std(res_dict['values'], True)
        
        rand_action_mask = torch.bernoulli(rand_action_probs)
        det_action_mask = rand_action_mask == 0.0
        res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask]
        res_dict['rand_action_mask'] = rand_action_mask

        return res_dict

    def prepare_dataset(self, batch_dict):
        super().prepare_dataset(batch_dict)
        self.dataset.values_dict['amp_obs'] = batch_dict['amp_obs']
        self.dataset.values_dict['amp_obs_demo'] = batch_dict['amp_obs_demo']
        self.dataset.values_dict['amp_obs_replay'] = batch_dict['amp_obs_replay']
        
        rand_action_mask = batch_dict['rand_action_mask']
        self.dataset.values_dict['rand_action_mask'] = rand_action_mask
        return

    def train_epoch(self):
        play_time_start = time.time()

        with torch.no_grad():
            if self.is_rnn:
                batch_dict = self.play_steps_rnn()
            else:
                batch_dict = self.play_steps() 

        play_time_end = time.time()
        update_time_start = time.time()
        rnn_masks = batch_dict.get('rnn_masks', None)
        
        self._update_amp_demos()
        num_obs_samples = batch_dict['amp_obs'].shape[0]
        amp_obs_demo = self._amp_obs_demo_buffer.sample(num_obs_samples)['amp_obs']
        batch_dict['amp_obs_demo'] = amp_obs_demo

        if (self._amp_replay_buffer.get_total_count() == 0):
            batch_dict['amp_obs_replay'] = batch_dict['amp_obs']
        else:
            batch_dict['amp_obs_replay'] = self._amp_replay_buffer.sample(num_obs_samples)['amp_obs']

        self.set_train()

        self.curr_frames = batch_dict.pop('played_frames')
        self.prepare_dataset(batch_dict)
        self.algo_observer.after_steps()

        if self.has_central_value:
            self.train_central_value()

        train_info = None

        if self.is_rnn:
            frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
            print(frames_mask_ratio)

        for _ in range(0, self.mini_epochs_num):
            ep_kls = []
            for i in range(len(self.dataset)):
                curr_train_info = self.train_actor_critic(self.dataset[i])
                
                if self.schedule_type == 'legacy':  
                    if self.multi_gpu:
                        curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
                    self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
                    self.update_lr(self.last_lr)

                if (train_info is None):
                    train_info = dict()
                    for k, v in curr_train_info.items():
                        train_info[k] = [v]
                else:
                    for k, v in curr_train_info.items():
                        train_info[k].append(v)
            
            av_kls = torch_ext.mean_list(train_info['kl'])

            if self.schedule_type == 'standard':
                if self.multi_gpu:
                    av_kls = self.hvd.average_value(av_kls, 'ep_kls')
                self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
                self.update_lr(self.last_lr)

        if self.schedule_type == 'standard_epoch':
            if self.multi_gpu:
                av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
            self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
            self.update_lr(self.last_lr)

        update_time_end = time.time()
        play_time = play_time_end - play_time_start
        update_time = update_time_end - update_time_start
        total_time = update_time_end - play_time_start

        self._store_replay_amp_obs(batch_dict['amp_obs'])

        train_info['play_time'] = play_time
        train_info['update_time'] = update_time
        train_info['total_time'] = total_time
        self._record_train_batch_info(batch_dict, train_info)

        return train_info

    def calc_gradients(self, input_dict):
        self.set_train()

        value_preds_batch = input_dict['old_values']
        old_action_log_probs_batch = input_dict['old_logp_actions']
        advantage = input_dict['advantages']
        old_mu_batch = input_dict['mu']
        old_sigma_batch = input_dict['sigma']
        return_batch = input_dict['returns']
        actions_batch = input_dict['actions']
        obs_batch = input_dict['obs']
        obs_batch = self._preproc_obs(obs_batch)

        amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size]
        amp_obs = self._preproc_amp_obs(amp_obs)
        amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size]
        amp_obs_replay = self._preproc_amp_obs(amp_obs_replay)

        amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size]
        amp_obs_demo = self._preproc_amp_obs(amp_obs_demo)
        amp_obs_demo.requires_grad_(True)
        
        rand_action_mask = input_dict['rand_action_mask']
        rand_action_sum = torch.sum(rand_action_mask)

        lr = self.last_lr
        kl = 1.0
        lr_mul = 1.0
        curr_e_clip = lr_mul * self.e_clip

        batch_dict = {
            'is_train': True,
            'prev_actions': actions_batch, 
            'obs' : obs_batch,
            'amp_obs' : amp_obs,
            'amp_obs_replay' : amp_obs_replay,
            'amp_obs_demo' : amp_obs_demo
        }

        rnn_masks = None
        if self.is_rnn:
            rnn_masks = input_dict['rnn_masks']
            batch_dict['rnn_states'] = input_dict['rnn_states']
            batch_dict['seq_length'] = self.seq_len

        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
            res_dict = self.model(batch_dict)
            action_log_probs = res_dict['prev_neglogp']
            values = res_dict['values']
            entropy = res_dict['entropy']
            mu = res_dict['mus']
            sigma = res_dict['sigmas']
            disc_agent_logit = res_dict['disc_agent_logit']
            disc_agent_replay_logit = res_dict['disc_agent_replay_logit']
            disc_demo_logit = res_dict['disc_demo_logit']

            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
            a_loss = a_info['actor_loss']
            a_clipped = a_info['actor_clipped'].float()

            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
            c_loss = c_info['critic_loss']

            b_loss = self.bound_loss(mu)
            
            c_loss = torch.mean(c_loss)
            a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum
            entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum
            b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum
            a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum

            disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0)
            disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo)
            disc_loss = disc_info['disc_loss']

            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \
                 + self._disc_coef * disc_loss
            
            a_info['actor_loss'] = a_loss
            a_info['actor_clip_frac'] = a_clip_frac
            c_info['critic_loss'] = c_loss

            if self.multi_gpu:
                self.optimizer.zero_grad()
            else:
                for param in self.model.parameters():
                    param.grad = None

        self.scaler.scale(loss).backward()
        #TODO: Refactor this ugliest code of the year
        if self.truncate_grads:
            if self.multi_gpu:
                self.optimizer.synchronize()
                self.scaler.unscale_(self.optimizer)
                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
                with self.optimizer.skip_synchronize():
                    self.scaler.step(self.optimizer)
                    self.scaler.update()
            else:
                self.scaler.unscale_(self.optimizer)
                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
                self.scaler.step(self.optimizer)
                self.scaler.update()    
        else:
            self.scaler.step(self.optimizer)
            self.scaler.update()

        with torch.no_grad():
            reduce_kl = not self.is_rnn
            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
            if self.is_rnn:
                kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel()  #/ sum_mask
                    
        self.train_result = {
            'entropy': entropy,
            'kl': kl_dist,
            'last_lr': self.last_lr, 
            'lr_mul': lr_mul, 
            'b_loss': b_loss
        }
        self.train_result.update(a_info)
        self.train_result.update(c_info)
        self.train_result.update(disc_info)

        return

    def _load_config_params(self, config):
        super()._load_config_params(config)
        
        # when eps greedy is enabled, rollouts will be generated using a mixture of
        # a deterministic and stochastic actions. The deterministic actions help to
        # produce smoother, less noisy, motions that can be used to train a better
        # discriminator. If the discriminator is only trained with jittery motions
        # from noisy actions, it can learn to phone in on the jitteriness to
        # differential between real and fake samples.
        self._enable_eps_greedy = bool(config['enable_eps_greedy'])

        self._task_reward_w = config['task_reward_w']
        self._disc_reward_w = config['disc_reward_w']

        self._amp_observation_space = self.env_info['amp_observation_space']
        self._amp_batch_size = int(config['amp_batch_size'])
        self._amp_minibatch_size = int(config['amp_minibatch_size'])
        assert(self._amp_minibatch_size <= self.minibatch_size)

        self._disc_coef = config['disc_coef']
        self._disc_logit_reg = config['disc_logit_reg']
        self._disc_grad_penalty = config['disc_grad_penalty']
        self._disc_weight_decay = config['disc_weight_decay']
        self._disc_reward_scale = config['disc_reward_scale']
        self._normalize_amp_input = config.get('normalize_amp_input', True)
        return

    def _build_net_config(self):
        config = super()._build_net_config()
        config['amp_input_shape'] = self._amp_observation_space.shape
        return config
    
    def _build_rand_action_probs(self):
        num_envs = self.vec_env.env.task.num_envs
        env_ids = to_torch(np.arange(num_envs), dtype=torch.float32, device=self.ppo_device)

        self._rand_action_probs = 1.0 - torch.exp(10 * (env_ids / (num_envs - 1.0) - 1.0))
        self._rand_action_probs[0] = 1.0
        self._rand_action_probs[-1] = 0.0
        
        if not self._enable_eps_greedy:
            self._rand_action_probs[:] = 1.0

        return

    def _init_train(self):
        super()._init_train()
        self._init_amp_demo_buf()
        return

    def _disc_loss(self, disc_agent_logit, disc_demo_logit, obs_demo):
        # prediction loss
        disc_loss_agent = self._disc_loss_neg(disc_agent_logit)
        disc_loss_demo = self._disc_loss_pos(disc_demo_logit)
        disc_loss = 0.5 * (disc_loss_agent + disc_loss_demo)

        # logit reg
        logit_weights = self.model.a2c_network.get_disc_logit_weights()
        disc_logit_loss = torch.sum(torch.square(logit_weights))
        disc_loss += self._disc_logit_reg * disc_logit_loss

        # grad penalty
        disc_demo_grad = torch.autograd.grad(disc_demo_logit, obs_demo, grad_outputs=torch.ones_like(disc_demo_logit),
                                             create_graph=True, retain_graph=True, only_inputs=True)
        disc_demo_grad = disc_demo_grad[0]
        disc_demo_grad = torch.sum(torch.square(disc_demo_grad), dim=-1)
        disc_grad_penalty = torch.mean(disc_demo_grad)
        disc_loss += self._disc_grad_penalty * disc_grad_penalty

        # weight decay
        if (self._disc_weight_decay != 0):
            disc_weights = self.model.a2c_network.get_disc_weights()
            disc_weights = torch.cat(disc_weights, dim=-1)
            disc_weight_decay = torch.sum(torch.square(disc_weights))
            disc_loss += self._disc_weight_decay * disc_weight_decay

        disc_agent_acc, disc_demo_acc = self._compute_disc_acc(disc_agent_logit, disc_demo_logit)

        disc_info = {
            'disc_loss': disc_loss,
            'disc_grad_penalty': disc_grad_penalty.detach(),
            'disc_logit_loss': disc_logit_loss.detach(),
            'disc_agent_acc': disc_agent_acc.detach(),
            'disc_demo_acc': disc_demo_acc.detach(),
            'disc_agent_logit': disc_agent_logit.detach(),
            'disc_demo_logit': disc_demo_logit.detach()
        }
        return disc_info

    def _disc_loss_neg(self, disc_logits):
        bce = torch.nn.BCEWithLogitsLoss()
        loss = bce(disc_logits, torch.zeros_like(disc_logits))
        return loss
    
    def _disc_loss_pos(self, disc_logits):
        bce = torch.nn.BCEWithLogitsLoss()
        loss = bce(disc_logits, torch.ones_like(disc_logits))
        return loss

    def _compute_disc_acc(self, disc_agent_logit, disc_demo_logit):
        agent_acc = disc_agent_logit < 0
        agent_acc = torch.mean(agent_acc.float())
        demo_acc = disc_demo_logit > 0
        demo_acc = torch.mean(demo_acc.float())
        return agent_acc, demo_acc

    def _fetch_amp_obs_demo(self, num_samples):
        amp_obs_demo = self.vec_env.env.fetch_amp_obs_demo(num_samples)
        return amp_obs_demo

    def _build_amp_buffers(self):
        batch_shape = self.experience_buffer.obs_base_shape
        self.experience_buffer.tensor_dict['amp_obs'] = torch.zeros(batch_shape + self._amp_observation_space.shape,
                                                                    device=self.ppo_device)
        self.experience_buffer.tensor_dict['rand_action_mask'] = torch.zeros(batch_shape, dtype=torch.float32, device=self.ppo_device)
        
        amp_obs_demo_buffer_size = int(self.config['amp_obs_demo_buffer_size'])
        self._amp_obs_demo_buffer = replay_buffer.ReplayBuffer(amp_obs_demo_buffer_size, self.ppo_device)

        self._amp_replay_keep_prob = self.config['amp_replay_keep_prob']
        replay_buffer_size = int(self.config['amp_replay_buffer_size'])
        self._amp_replay_buffer = replay_buffer.ReplayBuffer(replay_buffer_size, self.ppo_device)
        
        self._build_rand_action_probs()
        
        self.tensor_list += ['amp_obs', 'rand_action_mask']
        return

    def _init_amp_demo_buf(self):
        buffer_size = self._amp_obs_demo_buffer.get_buffer_size()
        num_batches = int(np.ceil(buffer_size / self._amp_batch_size))

        for i in range(num_batches):
            curr_samples = self._fetch_amp_obs_demo(self._amp_batch_size)
            self._amp_obs_demo_buffer.store({'amp_obs': curr_samples})

        return
    
    def _update_amp_demos(self):
        new_amp_obs_demo = self._fetch_amp_obs_demo(self._amp_batch_size)
        self._amp_obs_demo_buffer.store({'amp_obs': new_amp_obs_demo})
        return

    def _preproc_amp_obs(self, amp_obs):
        if self._normalize_amp_input:
            amp_obs = self._amp_input_mean_std(amp_obs)
        return amp_obs

    def _combine_rewards(self, task_rewards, amp_rewards):
        disc_r = amp_rewards['disc_rewards']
        
        combined_rewards = self._task_reward_w * task_rewards + \
                         + self._disc_reward_w * disc_r
        return combined_rewards

    def _eval_disc(self, amp_obs):
        proc_amp_obs = self._preproc_amp_obs(amp_obs)
        return self.model.a2c_network.eval_disc(proc_amp_obs)
    
    def _calc_advs(self, batch_dict):
        returns = batch_dict['returns']
        values = batch_dict['values']
        rand_action_mask = batch_dict['rand_action_mask']

        advantages = returns - values
        advantages = torch.sum(advantages, axis=1)
        if self.normalize_advantage:
            advantages = torch_ext.normalization_with_masks(advantages, rand_action_mask)

        return advantages

    def _calc_amp_rewards(self, amp_obs):
        disc_r = self._calc_disc_rewards(amp_obs)
        output = {
            'disc_rewards': disc_r
        }
        return output

    def _calc_disc_rewards(self, amp_obs):
        with torch.no_grad():
            disc_logits = self._eval_disc(amp_obs)
            prob = 1 / (1 + torch.exp(-disc_logits)) 
            disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.ppo_device)))
            disc_r *= self._disc_reward_scale

        return disc_r

    def _store_replay_amp_obs(self, amp_obs):
        buf_size = self._amp_replay_buffer.get_buffer_size()
        buf_total_count = self._amp_replay_buffer.get_total_count()
        if (buf_total_count > buf_size):
            keep_probs = to_torch(np.array([self._amp_replay_keep_prob] * amp_obs.shape[0]), device=self.ppo_device)
            keep_mask = torch.bernoulli(keep_probs) == 1.0
            amp_obs = amp_obs[keep_mask]

        if (amp_obs.shape[0] > buf_size):
            rand_idx = torch.randperm(amp_obs.shape[0])
            rand_idx = rand_idx[:buf_size]
            amp_obs = amp_obs[rand_idx]

        self._amp_replay_buffer.store({'amp_obs': amp_obs})
        return

    
    def _record_train_batch_info(self, batch_dict, train_info):
        super()._record_train_batch_info(batch_dict, train_info)
        train_info['disc_rewards'] = batch_dict['disc_rewards']
        return

    def _log_train_info(self, train_info, frame):
        super()._log_train_info(train_info, frame)

        self.writer.add_scalar('losses/disc_loss', torch_ext.mean_list(train_info['disc_loss']).item(), frame)

        self.writer.add_scalar('info/disc_agent_acc', torch_ext.mean_list(train_info['disc_agent_acc']).item(), frame)
        self.writer.add_scalar('info/disc_demo_acc', torch_ext.mean_list(train_info['disc_demo_acc']).item(), frame)
        self.writer.add_scalar('info/disc_agent_logit', torch_ext.mean_list(train_info['disc_agent_logit']).item(), frame)
        self.writer.add_scalar('info/disc_demo_logit', torch_ext.mean_list(train_info['disc_demo_logit']).item(), frame)
        self.writer.add_scalar('info/disc_grad_penalty', torch_ext.mean_list(train_info['disc_grad_penalty']).item(), frame)
        self.writer.add_scalar('info/disc_logit_loss', torch_ext.mean_list(train_info['disc_logit_loss']).item(), frame)

        disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards'])
        self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame)
        self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame)
        return

    def _amp_debug(self, info):
        with torch.no_grad():
            amp_obs = info['amp_obs']
            amp_obs = amp_obs[0:1]
            disc_pred = self._eval_disc(amp_obs)
            amp_rewards = self._calc_amp_rewards(amp_obs)
            disc_reward = amp_rewards['disc_rewards']

            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
            disc_reward = disc_reward.cpu().numpy()[0, 0]
            print("disc_pred: ", disc_pred, disc_reward)
        return

================================================
FILE: timechamber/ase/utils/amp_datasets.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
from rl_games.common import datasets

class AMPDataset(datasets.PPODataset):
    def __init__(self, batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len):
        super().__init__(batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len)
        self._idx_buf = torch.randperm(batch_size)
        return
    
    def update_mu_sigma(self, mu, sigma):	  
        raise NotImplementedError()
        return

    def _get_item(self, idx):
        start = idx * self.minibatch_size
        end = (idx + 1) * self.minibatch_size
        sample_idx = self._idx_buf[start:end]

        input_dict = {}
        for k,v in self.values_dict.items():
            if k not in self.special_names and v is not None:
                input_dict[k] = v[sample_idx]
                
        if (end >= self.batch_size):
            self._shuffle_idx_buf()

        return input_dict

    def _shuffle_idx_buf(self):
        self._idx_buf[:] = torch.randperm(self.batch_size)
        return

================================================
FILE: timechamber/ase/utils/amp_models.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch.nn as nn
from rl_games.algos_torch.models import ModelA2CContinuousLogStd


class ModelAMPContinuous(ModelA2CContinuousLogStd):
    def __init__(self, network):
        super().__init__(network)
        return

    def build(self, config):
        net = self.network_builder.build('amp', **config)
        for name, _ in net.named_parameters():
            print(name)
        # print(f"AMP config: {config}")
        obs_shape = config['input_shape']
        normalize_value = config.get('normalize_value', False)
        normalize_input = config.get('normalize_input', False)
        value_size = config.get('value_size', 1)

        return ModelAMPContinuous.Network(net, obs_shape=obs_shape, normalize_value=normalize_value,
                                          normalize_input=normalize_input, value_size=value_size)

    class Network(ModelA2CContinuousLogStd.Network):
        def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size):
            super().__init__(a2c_network, obs_shape=obs_shape, 
                             normalize_value=normalize_value,
                             normalize_input=normalize_input, 
                             value_size=value_size)
            return

        def forward(self, input_dict):
            is_train = input_dict.get('is_train', True)
            result = super().forward(input_dict)

            if (is_train):
                amp_obs = input_dict['amp_obs']
                disc_agent_logit = self.a2c_network.eval_disc(amp_obs)
                result["disc_agent_logit"] = disc_agent_logit

                amp_obs_replay = input_dict['amp_obs_replay']
                disc_agent_replay_logit = self.a2c_network.eval_disc(amp_obs_replay)
                result["disc_agent_replay_logit"] = disc_agent_replay_logit

                amp_demo_obs = input_dict['amp_obs_demo']
                disc_demo_logit = self.a2c_network.eval_disc(amp_demo_obs)
                result["disc_demo_logit"] = disc_demo_logit

            return result
    
        def eval_actor(self, obs):
            processed_obs = self.norm_obs(obs)
            mu, sigma = self.a2c_network.eval_actor(obs=processed_obs)
            return mu, sigma

        def eval_critic(self, obs):
            processed_obs = self.norm_obs(obs)
            value = self.a2c_network.eval_critic(processed_obs)
            return value

================================================
FILE: timechamber/ase/utils/amp_network_builder.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import layers
from rl_games.algos_torch import network_builder

import torch
import torch.nn as nn
import numpy as np

DISC_LOGIT_INIT_SCALE = 1.0

class AMPBuilder(network_builder.A2CBuilder):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        return

    class Network(network_builder.A2CBuilder.Network):
        def __init__(self, params, **kwargs):
            super().__init__(params, **kwargs)

            if self.is_continuous:
                if (not self.space_config['learn_sigma']):
                    actions_num = kwargs.get('actions_num')
                    sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
                    sigma_init(self.sigma)

            amp_input_shape = kwargs.get('amp_input_shape')
            self._build_disc(amp_input_shape)

            return

        def load(self, params):
            super().load(params)

            self._disc_units = params['disc']['units']
            self._disc_activation = params['disc']['activation']
            self._disc_initializer = params['disc']['initializer']
            return

        def forward(self, obs_dict):
            obs = obs_dict['obs']
            states = obs_dict.get('rnn_states', None)

            actor_outputs = self.eval_actor(obs)
            value = self.eval_critic(obs)

            output = actor_outputs + (value, states)

            return output

        def eval_actor(self, obs):
            a_out = self.actor_cnn(obs)
            a_out = a_out.contiguous().view(a_out.size(0), -1)
            a_out = self.actor_mlp(a_out)
                     
            if self.is_discrete:
                logits = self.logits(a_out)
                return logits

            if self.is_multi_discrete:
                logits = [logit(a_out) for logit in self.logits]
                return logits

            if self.is_continuous:
                mu = self.mu_act(self.mu(a_out))
                if self.space_config['fixed_sigma']:
                    sigma = mu * 0.0 + self.sigma_act(self.sigma)
                else:
                    sigma = self.sigma_act(self.sigma(a_out))

                return mu, sigma
            return

        def eval_critic(self, obs):
            c_out = self.critic_cnn(obs)
            c_out = c_out.contiguous().view(c_out.size(0), -1)
            c_out = self.critic_mlp(c_out)              
            value = self.value_act(self.value(c_out))
            return value

        def eval_disc(self, amp_obs):
            disc_mlp_out = self._disc_mlp(amp_obs)
            disc_logits = self._disc_logits(disc_mlp_out)
            return disc_logits

        def get_disc_logit_weights(self):
            return torch.flatten(self._disc_logits.weight)

        def get_disc_weights(self):
            weights = []
            for m in self._disc_mlp.modules():
                if isinstance(m, nn.Linear):
                    weights.append(torch.flatten(m.weight))

            weights.append(torch.flatten(self._disc_logits.weight))
            return weights

        def _build_disc(self, input_shape):
            self._disc_mlp = nn.Sequential()

            mlp_args = {
                'input_size' : input_shape[0], 
                'units' : self._disc_units, 
                'activation' : self._disc_activation, 
                'dense_func' : torch.nn.Linear
            }
            self._disc_mlp = self._build_mlp(**mlp_args)

            mlp_out_size = self._disc_units[-1]
            self._disc_logits = torch.nn.Linear(mlp_out_size, 1)

            mlp_init = self.init_factory.create(**self._disc_initializer)
            for m in self._disc_mlp.modules():
                if isinstance(m, nn.Linear):
                    mlp_init(m.weight)
                    if getattr(m, "bias", None) is not None:
                        torch.nn.init.zeros_(m.bias) 

            torch.nn.init.uniform_(self._disc_logits.weight, -DISC_LOGIT_INIT_SCALE, DISC_LOGIT_INIT_SCALE)
            torch.nn.init.zeros_(self._disc_logits.bias) 

            return

    def build(self, name, **kwargs):
        net = AMPBuilder.Network(self.params, **kwargs)
        return net

================================================
FILE: timechamber/ase/utils/amp_players.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch 

from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd

import timechamber.ase.utils.common_player as common_player

class AMPPlayerContinuous(common_player.CommonPlayer):
    def __init__(self, params):
        config = params['config']
        self._normalize_amp_input = config.get('normalize_amp_input', True)
        self._disc_reward_scale = config['disc_reward_scale']
        
        super().__init__(params)
        return

    def restore(self, fn):
        if (fn != 'Base'):
            super().restore(fn)
            if self._normalize_amp_input:
                checkpoint = torch_ext.load_checkpoint(fn)
                self._amp_input_mean_std.load_state_dict(checkpoint['amp_input_mean_std'])
        return
    
    def _build_net(self, config):
        super()._build_net(config)
        
        if self._normalize_amp_input:
            self._amp_input_mean_std = RunningMeanStd(config['amp_input_shape']).to(self.device)
            self._amp_input_mean_std.eval()  
        
        return

    def _post_step(self, info):
        super()._post_step(info)
        if (self.env.task.viewer):
            self._amp_debug(info)
        return

    def _build_net_config(self):
        config = super()._build_net_config()
        if (hasattr(self, 'env')) and self.env is not None:
            config['amp_input_shape'] = self.env.amp_observation_space.shape
        else:
            config['amp_input_shape'] = self.env_info['amp_observation_space']
        return config

    def _amp_debug(self, info):
        with torch.no_grad():
            amp_obs = info['amp_obs']
            amp_obs = amp_obs[0:1]
            disc_pred = self._eval_disc(amp_obs)
            amp_rewards = self._calc_amp_rewards(amp_obs)
            disc_reward = amp_rewards['disc_rewards']

            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
            disc_reward = disc_reward.cpu().numpy()[0, 0]
            print("disc_pred: ", disc_pred, disc_reward)

        return

    def _preproc_amp_obs(self, amp_obs):
        if self._normalize_amp_input:
            amp_obs = self._amp_input_mean_std(amp_obs)
        return amp_obs

    def _eval_disc(self, amp_obs):
        proc_amp_obs = self._preproc_amp_obs(amp_obs)
        return self.model.a2c_network.eval_disc(proc_amp_obs)

    def _calc_amp_rewards(self, amp_obs):
        disc_r = self._calc_disc_rewards(amp_obs)
        output = {
            'disc_rewards': disc_r
        }
        return output

    def _calc_disc_rewards(self, amp_obs):
        with torch.no_grad():
            disc_logits = self._eval_disc(amp_obs)
            prob = 1 / (1 + torch.exp(-disc_logits)) 
            disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.device)))
            disc_r *= self._disc_reward_scale
        return disc_r


================================================
FILE: timechamber/ase/utils/common_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import copy
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
import yaml

from rl_games.algos_torch import a2c_continuous
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common import a2c_common
from rl_games.common import datasets
from rl_games.common import schedulers
from rl_games.common import vecenv

import torch
from torch import optim

import timechamber.ase.utils.amp_datasets as amp_datasets
from timechamber.utils.utils import load_check, load_checkpoint

from tensorboardX import SummaryWriter

class CommonAgent(a2c_continuous.A2CAgent):
    def __init__(self, base_name, params):
        a2c_common.A2CBase.__init__(self, base_name, params)
        self.config = config = params['config']
        self._load_config_params(config)

        self.is_discrete = False
        self._setup_action_space()
        self.bounds_loss_coef = config.get('bounds_loss_coef', None)
        self.clip_actions = config.get('clip_actions', True)
        self._save_intermediate = config.get('save_intermediate', False)

        net_config = self._build_net_config()
        self.model = self.network.build(net_config)
        self.model.to(self.ppo_device)
        self.states = None

        self.init_rnn_from_model(self.model)
        self.last_lr = float(self.last_lr)

        self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)

        if self.normalize_input:
            obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
            self.running_mean_std = RunningMeanStd(obs_shape).to(self.ppo_device)
        if self.normalize_value:
            self.value_mean_std = self.central_value_net.model.value_mean_std if self.has_central_value else self.model.value_mean_std

        if self.has_central_value:
            cv_config = {
                'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape), 
                'value_size' : self.value_size,
                'ppo_device' : self.ppo_device, 
                'num_agents' : self.num_agents, 
                'horizon_length' : self.horizon_length, 
                'num_actors' : self.num_actors, 
                'num_actions' : self.actions_num, 
                'seq_len' : self.seq_len, 
                'model' : self.central_value_config['network'],
                'config' : self.central_value_config, 
                'writter' : self.writer,
                'multi_gpu' : self.multi_gpu
            }
            self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device)

        self.use_experimental_cv = self.config.get('use_experimental_cv', True)
        self.dataset = amp_datasets.AMPDataset(self.batch_size, self.minibatch_size, self.is_discrete, self.is_rnn, self.ppo_device, self.seq_len)
        self.algo_observer.after_init(self)

        return

    def init_tensors(self):
        super().init_tensors()
        self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses'])
        self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values'])

        self.tensor_list += ['next_obses']
        return

    def train(self):
        self.init_tensors()
        self.last_mean_rewards = -100500
        start_time = time.time()
        total_time = 0
        rep_count = 0
        self.frame = 0
        self.obs = self.env_reset()
        self.curr_frames = self.batch_size_envs

        model_output_file = os.path.join(self.nn_dir, self.config['name'])

        if self.multi_gpu:
            self.hvd.setup_algo(self)

        self._init_train()

        while True:
            epoch_num = self.update_epoch()
            train_info = self.train_epoch()

            sum_time = train_info['total_time']
            total_time += sum_time
            frame = self.frame
            if self.multi_gpu:
                self.hvd.sync_stats(self)

            if self.rank == 0:
                scaled_time = sum_time
                scaled_play_time = train_info['play_time']
                curr_frames = self.curr_frames
                self.frame += curr_frames
                if self.print_stats:
                    fps_step = curr_frames / scaled_play_time
                    fps_total = curr_frames / scaled_time
                    print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}')

                self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame)
                self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame)
                self.writer.add_scalar('info/epochs', epoch_num, frame)
                self._log_train_info(train_info, frame)

                self.algo_observer.after_print_stats(frame, epoch_num, total_time)
                
                if self.game_rewards.current_size > 0:
                    mean_rewards = self._get_mean_rewards()
                    mean_lengths = self.game_lengths.get_mean()

                    for i in range(self.value_size):
                        self.writer.add_scalar('rewards{0}/frame'.format(i), mean_rewards[i], frame)
                        self.writer.add_scalar('rewards{0}/iter'.format(i), mean_rewards[i], epoch_num)
                        self.writer.add_scalar('rewards{0}/time'.format(i), mean_rewards[i], total_time)

                    self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame)
                    self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)

                    if self.has_self_play_config:
                        self.self_play_manager.update(self)

                if self.save_freq > 0:
                    if (epoch_num % self.save_freq == 0):
                        self.save(model_output_file)

                        if (self._save_intermediate):
                            int_model_output_file = model_output_file + '_' + str(epoch_num).zfill(8)
                            self.save(int_model_output_file)

                if epoch_num > self.max_epochs:
                    self.save(model_output_file)
                    print('MAX EPOCHS NUM!')
                    return self.last_mean_rewards, epoch_num

                update_time = 0
        return

    def set_full_state_weights(self, weights):
        self.set_weights(weights)
        self.epoch_num = weights['epoch']
        if self.has_central_value:
            self.central_value_net.load_state_dict(weights['assymetric_vf_nets'])
        self.optimizer.load_state_dict(weights['optimizer'])
        self.frame = weights.get('frame', 0)
        self.last_mean_rewards = weights.get('last_mean_rewards', -100500)

        if self.vec_env is not None:
            env_state = weights.get('env_state', None)
            self.vec_env.set_env_state(env_state)

        return

    def restore(self, fn):
        checkpoint = load_checkpoint(fn, device=self.device)
        checkpoint = load_check(checkpoint=checkpoint,
                                normalize_input=self.normalize_input,
                                normalize_value=self.normalize_value)
        self.set_full_state_weights(checkpoint)

    def train_epoch(self):
        play_time_start = time.time()
        with torch.no_grad():
            if self.is_rnn:
                batch_dict = self.play_steps_rnn()
            else:
                batch_dict = self.play_steps() 

        play_time_end = time.time()
        update_time_start = time.time()
        rnn_masks = batch_dict.get('rnn_masks', None)

        self.set_train()

        self.curr_frames = batch_dict.pop('played_frames')
        self.prepare_dataset(batch_dict)
        self.algo_observer.after_steps()

        if self.has_central_value:
            self.train_central_value()

        train_info = None

        if self.is_rnn:
            frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
            print(frames_mask_ratio)

        for _ in range(0, self.mini_epochs_num):
            ep_kls = []
            for i in range(len(self.dataset)):
                curr_train_info = self.train_actor_critic(self.dataset[i])
                
                if self.schedule_type == 'legacy':  
                    if self.multi_gpu:
                        curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
                    self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
                    self.update_lr(self.last_lr)

                if (train_info is None):
                    train_info = dict()
                    for k, v in curr_train_info.items():
                        train_info[k] = [v]
                else:
                    for k, v in curr_train_info.items():
                        train_info[k].append(v)
            
            av_kls = torch_ext.mean_list(train_info['kl'])

            if self.schedule_type == 'standard':
                if self.multi_gpu:
                    av_kls = self.hvd.average_value(av_kls, 'ep_kls')
                self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
                self.update_lr(self.last_lr)

        if self.schedule_type == 'standard_epoch':
            if self.multi_gpu:
                av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
            self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
            self.update_lr(self.last_lr)

        update_time_end = time.time()
        play_time = play_time_end - play_time_start
        update_time = update_time_end - update_time_start
        total_time = update_time_end - play_time_start

        train_info['step_time'] = batch_dict['step_time']
        train_info['play_time'] = play_time
        train_info['update_time'] = update_time
        train_info['total_time'] = total_time
        self._record_train_batch_info(batch_dict, train_info)

        return train_info

    def play_steps(self):
        self.set_eval()

        epinfos = []
        done_indices = []
        update_list = self.update_list

        for n in range(self.horizon_length):
            self.obs = self.env_reset(done_indices)
            self.experience_buffer.update_data('obses', n, self.obs['obs'])

            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, masks)
            else:
                res_dict = self.get_action_values(self.obs)

            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k])

            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
            shaped_rewards = self.rewards_shaper(rewards)
            self.experience_buffer.update_data('rewards', n, shaped_rewards)
            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)

            terminated = infos['terminate'].float()
            terminated = terminated.unsqueeze(-1)
            next_vals = self._eval_critic(self.obs)
            next_vals *= (1.0 - terminated)
            self.experience_buffer.update_data('next_values', n, next_vals)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            done_indices = all_done_indices[::self.num_agents]
  
            self.game_rewards.update(self.current_rewards[done_indices])
            self.game_lengths.update(self.current_lengths[done_indices])
            self.algo_observer.process_infos(infos, done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones

            done_indices = done_indices[:, 0]

        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_next_values = self.experience_buffer.tensor_dict['next_values']
        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        
        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size

        return batch_dict

    def prepare_dataset(self, batch_dict):
        obses = batch_dict['obses']
        returns = batch_dict['returns']
        dones = batch_dict['dones']
        values = batch_dict['values']
        actions = batch_dict['actions']
        neglogpacs = batch_dict['neglogpacs']
        mus = batch_dict['mus']
        sigmas = batch_dict['sigmas']
        rnn_states = batch_dict.get('rnn_states', None)
        rnn_masks = batch_dict.get('rnn_masks', None)
        
        advantages = self._calc_advs(batch_dict)

        if self.normalize_value:
            self.value_mean_std.train()
            values = self.value_mean_std(values)
            returns = self.value_mean_std(returns)
            self.value_mean_std.eval()

        dataset_dict = {}
        dataset_dict['old_values'] = values
        dataset_dict['old_logp_actions'] = neglogpacs
        dataset_dict['advantages'] = advantages
        dataset_dict['returns'] = returns
        dataset_dict['actions'] = actions
        dataset_dict['obs'] = obses
        dataset_dict['rnn_states'] = rnn_states
        dataset_dict['rnn_masks'] = rnn_masks
        dataset_dict['mu'] = mus
        dataset_dict['sigma'] = sigmas

        self.dataset.update_values_dict(dataset_dict)

        if self.has_central_value:
            dataset_dict = {}
            dataset_dict['old_values'] = values
            dataset_dict['advantages'] = advantages
            dataset_dict['returns'] = returns
            dataset_dict['actions'] = actions
            dataset_dict['obs'] = batch_dict['states']
            dataset_dict['rnn_masks'] = rnn_masks
            self.central_value_net.update_dataset(dataset_dict)

        return

    def calc_gradients(self, input_dict):
        self.set_train()

        value_preds_batch = input_dict['old_values']
        old_action_log_probs_batch = input_dict['old_logp_actions']
        advantage = input_dict['advantages']
        old_mu_batch = input_dict['mu']
        old_sigma_batch = input_dict['sigma']
        return_batch = input_dict['returns']
        actions_batch = input_dict['actions']
        obs_batch = input_dict['obs']
        obs_batch = self._preproc_obs(obs_batch)

        lr = self.last_lr
        kl = 1.0
        lr_mul = 1.0
        curr_e_clip = lr_mul * self.e_clip

        batch_dict = {
            'is_train': True,
            'prev_actions': actions_batch, 
            'obs' : obs_batch
        }

        rnn_masks = None
        if self.is_rnn:
            rnn_masks = input_dict['rnn_masks']
            batch_dict['rnn_states'] = input_dict['rnn_states']
            batch_dict['seq_length'] = self.seq_len

        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
            res_dict = self.model(batch_dict)
            action_log_probs = res_dict['prev_neglogp']
            values = res_dict['values']
            entropy = res_dict['entropy']
            mu = res_dict['mus']
            sigma = res_dict['sigmas']

            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
            a_loss = a_info['actor_loss']

            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
            c_loss = c_info['critic_loss']

            b_loss = self.bound_loss(mu)
            
            a_loss = torch.mean(a_loss)
            c_loss = torch.mean(c_loss)
            b_loss = torch.mean(b_loss)
            entropy = torch.mean(entropy)

            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss
            
            a_clip_frac = torch.mean(a_info['actor_clipped'].float())
            
            a_info['actor_loss'] = a_loss
            a_info['actor_clip_frac'] = a_clip_frac

            if self.multi_gpu:
                self.optimizer.zero_grad()
            else:
                for param in self.model.parameters():
                    param.grad = None

        self.scaler.scale(loss).backward()
        self.scaler.step(self.optimizer)
        self.scaler.update()

        with torch.no_grad():
            reduce_kl = not self.is_rnn
            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
                    
        self.train_result = {
            'entropy': entropy,
            'kl': kl_dist,
            'last_lr': self.last_lr, 
            'lr_mul': lr_mul, 
            'b_loss': b_loss
        }
        self.train_result.update(a_info)
        self.train_result.update(c_info)

        return

    def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values):
        lastgaelam = 0
        mb_advs = torch.zeros_like(mb_rewards)

        for t in reversed(range(self.horizon_length)):
            not_done = 1.0 - mb_fdones[t]
            not_done = not_done.unsqueeze(1)

            delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t]
            lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam
            mb_advs[t] = lastgaelam

        return mb_advs

    def env_reset(self, env_ids=None):
        obs = self.vec_env.reset(env_ids)
        obs = self.obs_to_tensors(obs)
        return obs

    def bound_loss(self, mu):
        if self.bounds_loss_coef is not None:
            soft_bound = 1.0
            mu_loss_high = torch.clamp_min(mu - soft_bound, 0.0)**2
            mu_loss_low = torch.clamp_max(mu + soft_bound, 0.0)**2
            b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1)
        else:
            b_loss = 0
        return b_loss

    def _get_mean_rewards(self):
        return self.game_rewards.get_mean()

    def _load_config_params(self, config):
        self.last_lr = config['learning_rate']
        return

    def _build_net_config(self):
        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
        config = {
            'actions_num' : self.actions_num,
            'input_shape' : obs_shape,
            'num_seqs' : self.num_actors * self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value' : self.normalize_value,
            'normalize_input': self.normalize_input,
        }
        return config

    def _setup_action_space(self):
        action_space = self.env_info['action_space']
        self.actions_num = action_space.shape[0]

        # todo introduce device instead of cuda()
        self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device)
        self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device)
        return

    def _init_train(self):
        return

    def _eval_critic(self, obs_dict):
        self.model.eval()
        obs = obs_dict['obs']
        processed_obs = self._preproc_obs(obs)
        value = self.model.eval_critic(processed_obs)

        return value

    def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip):
        ratio = torch.exp(old_action_log_probs_batch - action_log_probs)
        surr1 = advantage * ratio
        surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip,
                                    1.0 + curr_e_clip)
        a_loss = torch.max(-surr1, -surr2)

        clipped = torch.abs(ratio - 1.0) > curr_e_clip
        clipped = clipped.detach()
        
        info = {
            'actor_loss': a_loss,
            'actor_clipped': clipped.detach()
        }
        return info

    def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value):
        if clip_value:
            value_pred_clipped = value_preds_batch + \
                    (values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip)
            value_losses = (values - return_batch)**2
            value_losses_clipped = (value_pred_clipped - return_batch)**2
            c_loss = torch.max(value_losses, value_losses_clipped)
        else:
            c_loss = (return_batch - values)**2

        info = {
            'critic_loss': c_loss
        }
        return info
    
    def _calc_advs(self, batch_dict):
        returns = batch_dict['returns']
        values = batch_dict['values']

        advantages = returns - values
        advantages = torch.sum(advantages, axis=1)

        if self.normalize_advantage:
            advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

        return advantages

    def _record_train_batch_info(self, batch_dict, train_info):
        return

    def _log_train_info(self, train_info, frame):
        self.writer.add_scalar('performance/update_time', train_info['update_time'], frame)
        self.writer.add_scalar('performance/play_time', train_info['play_time'], frame)
        self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame)
        self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame)
        
        self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame)
        self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame)
        self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame)
        self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame)
        self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame)
        self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame)
        self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame)
        return


================================================
FILE: timechamber/ase/utils/common_player.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch 

from rl_games.algos_torch import players
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common.player import BasePlayer
from timechamber.utils.utils import load_check, load_checkpoint

import numpy as np

class CommonPlayer(players.PpoPlayerContinuous):
    def __init__(self, params):
        config = params['config']
        BasePlayer.__init__(self, params)
        self.network = config['network']
        
        self._setup_action_space()
        self.mask = [False]

        self.normalize_input = self.config['normalize_input']
        self.normalize_value = self.config.get('normalize_value', False)

        net_config = self._build_net_config()
        self._build_net(net_config)   
        
        return

    def run(self):
        n_games = self.games_num
        render = self.render_env
        n_game_life = self.n_game_life
        is_determenistic = self.is_determenistic
        sum_rewards = 0
        sum_steps = 0
        sum_game_res = 0
        n_games = n_games * n_game_life
        games_played = 0
        has_masks = False
        has_masks_func = getattr(self.env, "has_action_mask", None) is not None

        op_agent = getattr(self.env, "create_agent", None)
        if op_agent:
            agent_inited = True

        if has_masks_func:
            has_masks = self.env.has_action_mask()

        need_init_rnn = self.is_rnn
        for _ in range(n_games):
            if games_played >= n_games:
                break

            obs_dict = self.env_reset()
            batch_size = 1
            batch_size = self.get_batch_size(obs_dict['obs'], batch_size)

            if need_init_rnn:
                self.init_rnn()
                need_init_rnn = False

            cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
            steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)

            print_game_res = False

            done_indices = []

            for n in range(self.max_steps):
                # obs_dict = self.env_reset(done_indices)

                if has_masks:
                    masks = self.env.get_action_mask()
                    action = self.get_masked_action(obs_dict, masks, is_determenistic)
                else:
                    action = self.get_action(obs_dict, is_determenistic)
                obs_dict, r, done, info =  self.env_step(self.env, action)
                obs_dict = {'obs': obs_dict}
                # print('obs_dict shape: ', obs_dict.shape)
                cr += r
                steps += 1
  
                self._post_step(info)

                if render:
                    self.env.render(mode = 'human')
                    time.sleep(self.render_sleep)

                all_done_indices = done.nonzero(as_tuple=False)
                done_indices = all_done_indices[::self.num_agents]
                done_count = len(done_indices)
                games_played += done_count

                if done_count > 0:
                    if self.is_rnn:
                        for s in self.states:
                            s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0

                    cur_rewards = cr[done_indices].sum().item()
                    cur_steps = steps[done_indices].sum().item()

                    cr = cr * (1.0 - done.float())
                    steps = steps * (1.0 - done.float())
                    sum_rewards += cur_rewards
                    sum_steps += cur_steps

                    game_res = 0.0
                    if isinstance(info, dict):
                        if 'battle_won' in info:
                            print_game_res = True
                            game_res = info.get('battle_won', 0.5)
                        if 'scores' in info:
                            print_game_res = True
                            game_res = info.get('scores', 0.5)
                    if self.print_stats:
                        if print_game_res:
                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
                        else:
                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)

                    sum_game_res += game_res
                    if batch_size//self.num_agents == 1 or games_played >= n_games:
                        break
                
                done_indices = done_indices[:, 0]

        print(sum_rewards)
        if print_game_res:
            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
        else:
            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)

        return

    def get_action(self, obs_dict, is_determenistic = False):
        output = super().get_action(obs_dict['obs'], is_determenistic)
        return output

    def env_step(self, env, actions):
        if not self.is_tensor_obses:
            actions = actions.cpu().numpy()
        obs, rewards, dones, infos = env.step(actions)

        if hasattr(obs, 'dtype') and obs.dtype == np.float64:
            obs = np.float32(obs)
        if self.value_size > 1:
            rewards = rewards[0]
        if self.is_tensor_obses:
            return obs, rewards.to(self.device), dones.to(self.device), infos
        else:
            if np.isscalar(dones):
                rewards = np.expand_dims(np.asarray(rewards), 0)
                dones = np.expand_dims(np.asarray(dones), 0)
            return self.obs_to_torch(obs), torch.from_numpy(rewards), torch.from_numpy(dones), infos

    def _build_net(self, config):
        self.model = self.network.build(config)
        self.model.to(self.device)
        self.model.eval()
        self.is_rnn = self.model.is_rnn()
        if self.normalize_input:
            obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
            self.running_mean_std = RunningMeanStd(obs_shape).to(self.device)
            self.running_mean_std.eval() 
        return

    def env_reset(self, env_ids=None):
        obs = self.env.reset(env_ids)
        return self.obs_to_torch(obs)

    def _post_step(self, info):
        return

    def _build_net_config(self):
        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
        config = {
            'actions_num' : self.actions_num,
            'input_shape' : obs_shape,
            'num_seqs' : self.num_agents,
            'normalize_input': self.normalize_input,
            'normalize_value' : self.normalize_value,
        }
        return config

    def restore(self, fn):
        checkpoint = load_checkpoint(fn, device=self.device)
        checkpoint = load_check(checkpoint=checkpoint,
                                normalize_input=self.normalize_input,
                                normalize_value=self.normalize_value)
        self.model.load_state_dict(checkpoint['model'])

        if self.normalize_input and 'running_mean_std' in checkpoint:
            self.model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])

    def _setup_action_space(self):
        self.actions_num = self.action_space.shape[0] 
        self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
        self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
        return

================================================
FILE: timechamber/ase/utils/replay_buffer.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch

class ReplayBuffer():
    def __init__(self, buffer_size, device):
        self._head = 0
        self._total_count = 0
        self._buffer_size = buffer_size
        self._device = device
        self._data_buf = None
        self._sample_idx = torch.randperm(buffer_size)
        self._sample_head = 0

        return

    def reset(self):
        self._head = 0
        self._total_count = 0
        self._reset_sample_idx()
        return

    def get_buffer_size(self):
        return self._buffer_size

    def get_total_count(self):
        return self._total_count

    def store(self, data_dict):
        if (self._data_buf is None):
            self._init_data_buf(data_dict)

        n = next(iter(data_dict.values())).shape[0]
        buffer_size = self.get_buffer_size()
        assert(n <= buffer_size)

        for key, curr_buf in self._data_buf.items():
            curr_n = data_dict[key].shape[0]
            assert(n == curr_n)

            store_n = min(curr_n, buffer_size - self._head)
            curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n]    
        
            remainder = n - store_n
            if (remainder > 0):
                curr_buf[0:remainder] = data_dict[key][store_n:]  

        self._head = (self._head + n) % buffer_size
        self._total_count += n

        return

    def sample(self, n):
        total_count = self.get_total_count()
        buffer_size = self.get_buffer_size()

        idx = torch.arange(self._sample_head, self._sample_head + n)
        idx = idx % buffer_size
        rand_idx = self._sample_idx[idx]
        if (total_count < buffer_size):
            rand_idx = rand_idx % self._head

        samples = dict()
        for k, v in self._data_buf.items():
            samples[k] = v[rand_idx]

        self._sample_head += n
        if (self._sample_head >= buffer_size):
            self._reset_sample_idx()

        return samples

    def _reset_sample_idx(self):
        buffer_size = self.get_buffer_size()
        self._sample_idx[:] = torch.randperm(buffer_size)
        self._sample_head = 0
        return

    def _init_data_buf(self, data_dict):
        buffer_size = self.get_buffer_size()
        self._data_buf = dict()

        for k, v in data_dict.items():
            v_shape = v.shape[1:]
            self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device)

        return

================================================
FILE: timechamber/cfg/config.yaml
================================================
# Task name - used to pick the class to load
task_name: ${task.name}
# experiment name. defaults to name of training config
experiment: ''

# if set to positive integer, overrides the default number of environments
num_envs: ''

# seed - set to -1 to choose random seed
seed: 42
# set to True for deterministic performance
torch_deterministic: False

# set the maximum number of learning iterations to train for. overrides default per-environment setting
max_iterations: ''

# set minibatch_size
minibatch_size: 32768

## Device config
#  'physx' or 'flex'
physics_engine: 'physx'
# whether to use cpu or gpu pipeline
pipeline: 'gpu'
use_gpu: True
use_gpu_pipeline: True
# device for running physics simulation
sim_device: 'cuda:0'
# device to run RL
rl_device: 'cuda:0'
graphics_device_id: 0
device_type: cuda

## PhysX arguments
num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
solver_type: 1 # 0: pgs, 1: tgs
num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread

# RLGames Arguments
# test - if set, run policy in inference mode (requires setting checkpoint to load)
test: False
# used to set checkpoint path
checkpoint: ''
op_checkpoint: ''
player_pool_type: ''
num_agents: 2

# HRL Arguments
motion_file: 'tasks/data/motions/reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy'

# set to True to use multi-gpu horovod training
multi_gpu: False

wandb_activate: False
wandb_group: ''
wandb_name: ${train.params.config.name}
wandb_entity: ''
wandb_project: 'timechamber'
capture_video: False
capture_video_freq: 1464
capture_video_len: 100
force_render: True

# disables rendering
headless: True

# set default task and default training config based on task
defaults:
  - task: MA_Humanoid_Strike
  - train: ${task}HRL
  - hydra/job_logging: disabled

# set the directory where the output files get saved
hydra:
  output_subdir: null
  run:
    dir: .


================================================
FILE: timechamber/cfg/task/MA_Ant_Battle.yaml
================================================
# used to create the object
name: MA_Ant_Battle

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
  #  numEnvs: ${...num_envs}
  numEnvs: ${resolve_default:4096,${...num_envs}}
  numAgents: ${...num_agents}
  # rgb color of Ant body
  color: [ [ 0.97, 0.38, 0.06 ],[ 0.24, 0.38, 0.06 ],[ 0.56, 0.85, 0.25 ],[ 0.56, 0.85, 0.25 ],[ 0.14, 0.97, 0.24 ],[ 0.63, 0.2, 0.87 ] ]
  envSpacing: 6
  borderlineSpace: 3
  episodeLength: 1000
  enableDebugVis: False
  controlFrequencyInv: 1
  clipActions: 1.0
  clipObservations: 5.0
  actionScale: 0.5
  control:
    # PD Drive parameters:
    stiffness: 85.0  # [N*m/rad]
    damping: 2.0     # [N*m*s/rad]
    actionScale: 0.5
    controlFrequencyInv: 1 # 60 Hz

  # reward parameters
  headingWeight: 0.5
  upWeight: 0.1

  # cost parameters
  terminationHeight: 0.31
  dofVelocityScale: 0.2
  jointsAtLimitCost: -0.1

  plane:
    staticFriction: 1.0
    dynamicFriction: 1.0
    restitution: 0.0

  asset:
    assetFileName: "mjcf/nv_ant.xml"

  # set to True if you use camera sensors in the environment
  enableCameraSensors: False

sim:
  dt: 0.0166 # 1/60 s
  substeps: 2
  up_axis: "z"
  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
  gravity: [ 0.0, 0.0, -9.81 ]
  physx:
    num_threads: ${....num_threads}
    solver_type: ${....solver_type}
    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
    num_position_iterations: 4
    num_velocity_iterations: 0
    contact_offset: 0.02
    rest_offset: 0.0
    bounce_threshold_velocity: 0.2
    max_depenetration_velocity: 10.0
    default_buffer_size_multiplier: 5.0
    max_gpu_contact_pairs: 8388608 # 8*1024*1024
    num_subscenes: ${....num_subscenes}
    contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)

task:
  randomize: False
  randomization_params:
    # specify which attributes to randomize for each actor type and property
    frequency: 600   # Define how many environment steps between generating new randomizations
    observations:
      range: [ 0, .002 ] # range for the white noise
      operation: "additive"
      distribution: "gaussian"
    actions:
      range: [ 0., .02 ]
      operation: "additive"
      distribution: "gaussian"
    actor_params:
      ant:
        color: True
        rigid_body_properties:
          mass:
            range: [ 0.5, 1.5 ]
            operation: "scaling"
            distribution: "uniform"
            setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
        dof_properties:
          damping:
            range: [ 0.5, 1.5 ]
            operation: "scaling"
            distribution: "uniform"
          stiffness:
            range: [ 0.5, 1.5 ]
            operation: "scaling"
            distribution: "uniform"
          lower:
            range: [ 0, 0.01 ]
            operation: "additive"
            distribution: "gaussian"
          upper:
            range: [ 0, 0.01 ]
            operation: "additive"
            distribution: "gaussian"


================================================
FILE: timechamber/cfg/task/MA_Ant_Sumo.yaml
================================================
# used to create the object
name: MA_Ant_Sumo

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
#  numEnvs: ${...num_envs}
  numEnvs: ${resolve_default:4096,${...num_envs}}
  numAgents: ${...num_agents}
  envSpacing: 6
  borderlineSpace: 3
  episodeLength: 1000
  enableDebugVis: False
  controlFrequencyInv: 1
  clipActions: 1.0
  clipObservations: 5.0
  actionScale: 0.5
  control:
    # PD Drive parameters:
    stiffness: 85.0  # [N*m/rad]
    damping: 2.0     # [N*m*s/rad]
    actionScale: 0.5
    controlFrequencyInv: 1 # 60 Hz

  # reward parameters
  headingWeight: 0.5
  upWeight: 0.1

  # cost parameters
  terminationHeight: 0.31
  dofVelocityScale: 0.2
  jointsAtLimitCost: -0.1

  plane:
    staticFriction: 1.0
    dynamicFriction: 1.0
    restitution: 0.0

  asset:
    assetFileName: "mjcf/nv_ant.xml"

# set to True if you use camera sensors in the environment
enableCameraSensors: False

sim:
  dt: 0.0166 # 1/60 s
  substeps: 2
  up_axis: "z"
  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
  gravity: [0.0, 0.0, -9.81]
  physx:
    num_threads: ${....num_threads}
    solver_type: ${....solver_type}
    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
    num_position_iterations: 4
    num_velocity_iterations: 0
    contact_offset: 0.02
    rest_offset: 0.0
    bounce_threshold_velocity: 0.2
    max_depenetration_velocity: 10.0
    default_buffer_size_multiplier: 5.0
    max_gpu_contact_pairs: 8388608 # 8*1024*1024
    num_subscenes: ${....num_subscenes}
    contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)

task:
  randomize: False
  randomization_params:
    # specify which attributes to randomize for each actor type and property
    frequency: 600   # Define how many environment steps between generating new randomizations
    observations:
      range: [0, .002] # range for the white noise
      operation: "additive"
      distribution: "gaussian"
    actions:
      range: [0., .02]
      operation: "additive"
      distribution: "gaussian"
    actor_params:
      ant:
        color: True
        rigid_body_properties:
          mass:
            range: [0.5, 1.5]
            operation: "scaling"
            distribution: "uniform"
            setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
        dof_properties:
          damping:
            range: [0.5, 1.5]
            operation: "scaling"
            distribution: "uniform"
          stiffness:
            range: [0.5, 1.5]
            operation: "scaling"
            distribution: "uniform"
          lower:
            range: [0, 0.01]
            operation: "additive"
            distribution: "gaussian"
          upper:
            range: [0, 0.01]
            operation: "additive"
            distribution: "gaussian"


================================================
FILE: timechamber/cfg/task/MA_Humanoid_Strike.yaml
================================================
name: MA_Humanoid_Strike

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym. 
env: 
  numEnvs: ${resolve_default:4096,${...num_envs}}
  envSpacing: 6
  episodeLength: 1500
  borderlineSpace: 3.0
  numAgents: 2
  isFlagrun: False
  enableDebugVis: False
  
  pdControl: True
  powerScale: 1.0
  controlFrequencyInv: 2 # 30 Hz
  stateInit: "Default"
  hybridInitProb: 0.5
  numAMPObsSteps: 10
  
  localRootObs: True
  keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"]
  contactBodies: ["right_foot", "left_foot"]
  # forceBodies: ["torso", "right_upper_arm", "right_thigh", "right_shin", "left_thigh", "left_shin"]
  forceBodies: ["torso", "right_thigh", "right_shin", "left_thigh", "left_shin"]
  terminationHeight: 0.15
  enableEarlyTermination: True

  strikeBodyNames: ["sword", "shield", "right_hand", "right_lower_arm", "left_hand", "left_lower_arm"]
  enableTaskObs: True
  
  asset:
    assetRoot: "tasks/data/assets"
    assetFileName: "mjcf/amp_humanoid_sword_shield.xml"

  plane:
    staticFriction: 1.0
    dynamicFriction: 1.0
    restitution: 0.0

sim:
  substeps: 2
  physx:
    num_threads: 4
    solver_type: 1  # 0: pgs, 1: tgs
    num_position_iterations: 4
    num_velocity_iterations: 0
    contact_offset: 0.02
    rest_offset: 0.0
    bounce_threshold_velocity: 0.2
    max_depenetration_velocity: 10.0
    default_buffer_size_multiplier: 10.0

  flex:
    num_inner_iterations: 10
    warm_start: 0.25


================================================
FILE: timechamber/cfg/train/MA_Ant_BattlePPO.yaml
================================================
params:
  seed: ${...seed}

  algo:
    name: self_play_continuous

  model:
    name: continuous_a2c_logstd

  network:
    name: actor_critic
    separate: False
    space:
      continuous:
        mu_activation: None
        sigma_activation: None
        mu_init:
          name: default
        sigma_init:
          name: const_initializer
          val: 0
        fixed_sigma: True
    mlp:
      units: [ 256, 128, 64 ]
      activation: elu
      d2rl: False

      initializer:
        name: default

  player_pool_type: ${...player_pool_type}
  load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
  load_path: ${...checkpoint} # path to the checkpoint to load
  op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself
  num_agents: ${...num_agents}
  update_win_rate: 0.7
  player_pool_length: 4
  games_to_check: 400
  max_update_steps: 5000

  device: ${...rl_device}
  config:
    name: ${resolve_default:MA_Ant_1v1,${....experiment}}
    env_name: rlgpu
    multi_gpu: ${....multi_gpu}
    ppo: True
    mixed_precision: False
    normalize_input: True
    normalize_value: True
    value_bootstrap: True
    num_actors: ${....task.env.numEnvs}
    reward_shaper:
      scale_value: 0.01
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
    learning_rate: 3e-4
    lr_schedule: adaptive
    schedule_type: standard
    kl_threshold: 0.008
    score_to_win: 20000
    max_epochs: ${resolve_default:2000,${....max_iterations}}
    save_best_after: 200
    save_frequency: 1000
    grad_norm: 1.0
    entropy_coef: 0.0
    truncate_grads: True
    e_clip: 0.2
    horizon_length: 64
    minibatch_size: ${resolve_default:32768,${....minibatch_size}}
    mini_epochs: 4
    critic_coef: 2
    clip_value: True
    use_smooth_clamp: True
    bounds_loss_coef: 0.0000
    player:
      games_num: 4000
      record_elo: True
      init_elo: 400

================================================
FILE: timechamber/cfg/train/MA_Ant_SumoPPO.yaml
================================================
params:
  seed: ${...seed}

  algo:
    name: self_play_continuous

  model:
    name: continuous_a2c_logstd

  network:
    name: actor_critic
    separate: False
    space:
      continuous:
        mu_activation: None
        sigma_activation: None
        mu_init:
          name: default
        sigma_init:
          name: const_initializer
          val: 0
        fixed_sigma: True
    mlp:
      units: [ 256, 128, 64 ]
      activation: elu
      d2rl: False

      initializer:
        name: default
  # self play agent related
  player_pool_type: ${...player_pool_type}
  load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
  load_path: ${...checkpoint} # path to the checkpoint to load
  op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself
  num_agents: ${...num_agents}

  update_win_rate: 0.7
  player_pool_length: 2
  games_to_check: 400
  max_update_steps: 5000
  device: ${...rl_device}
  config:
    name: ${resolve_default:MA_Ant_1v1,${....experiment}}
    env_name: rlgpu
    multi_gpu: ${....multi_gpu}
    ppo: True
    mixed_precision: False
    normalize_input: True
    normalize_value: True
    value_bootstrap: True
    num_actors: ${....task.env.numEnvs}
    reward_shaper:
      scale_value: 0.01
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
    learning_rate: 3e-4
    lr_schedule: adaptive
    schedule_type: standard
    kl_threshold: 0.008
    score_to_win: 20000
    max_epochs: ${resolve_default:100000,${....max_iterations}}
    save_best_after: 200
    save_frequency: 500
    grad_norm: 1.0
    entropy_coef: 0.0
    truncate_grads: True
    e_clip: 0.2
    horizon_length: 64
    minibatch_size: ${resolve_default:32768,${....minibatch_size}}
    mini_epochs: 4
    critic_coef: 2
    clip_value: True
    use_smooth_clamp: True
    bounds_loss_coef: 0.0000
    player:
      games_num: 4000
      record_elo: True
      init_elo: 400

================================================
FILE: timechamber/cfg/train/MA_Humanoid_StrikeHRL.yaml
================================================
params:
  seed: ${...seed}

  algo:
    name: self_play_hrl

  model:
    name: hrl

  network:
    name: hrl
    separate: True

    space:
      continuous:
        mu_activation: None
        sigma_activation: None
        mu_init:
          name: default
        sigma_init:
          name: const_initializer
          val: -2.3
        fixed_sigma: True
        learn_sigma: False

    mlp:
      units: [1024, 512]
      activation: relu
      d2rl: False

      initializer:
        name: default
      regularizer:
        name: None

  # self play agent related
  player_pool_type: ${...player_pool_type}
  load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
  load_path: ${...checkpoint} # path to the checkpoint to load
  op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself
  num_agents: ${...num_agents}

  update_win_rate: 0.8
  player_pool_length: 4
  games_to_check: 400
  max_update_steps: 5000
  device: ${...rl_device}

  config:
    name: Humanoid
    env_name: rlgpu
    multi_gpu: False
    ppo: True
    mixed_precision: False
    normalize_input: True
    normalize_value: True
    num_actors: ${....task.env.numEnvs}
    reward_shaper:
      scale_value: 1
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
    learning_rate: 2e-5
    lr_schedule: constant
    score_to_win: 20000000
    max_epochs: ${resolve_default:100000,${....max_iterations}}
    save_best_after: 10
    save_frequency: 50
    print_stats: True
    grad_norm: 1.0
    entropy_coef: 0.0
    truncate_grads: False
    e_clip: 0.2
    horizon_length: 64
    minibatch_size: ${resolve_default:64,${....minibatch_size}}
    mini_epochs: 6
    critic_coef: 5
    clip_value: False
    seq_len: 4
    bounds_loss_coef: 10
    
    task_reward_w: 0.9
    disc_reward_w: 0.1

    player:
      determenistic: False
      games_num: 4000
      record_elo: True
      init_elo: 400

    llc_steps: 5
    llc_config: cfg/train/base/ase_humanoid_hrl.yaml
    llc_checkpoint: tasks/data/models/llc_reallusion_sword_shield.pth


================================================
FILE: timechamber/cfg/train/base/ase_humanoid_hrl.yaml
================================================
params:
  seed: -1

  algo:
    name: ase

  model:
    name: ase

  network:
    name: ase
    separate: True

    space:
      continuous:
        mu_activation: None
        sigma_activation: None
        mu_init:
          name: default
        sigma_init:
          name: const_initializer
          val: -2.9
        fixed_sigma: True
        learn_sigma: False

    mlp:
      units: [1024, 1024, 512]
      activation: relu
      d2rl: False

      initializer:
        name: default
      regularizer:
        name: None

    disc:
      units: [1024, 1024, 512]
      activation: relu

      initializer:
        name: default

    enc:
      units: [1024, 512]
      activation: relu
      separate: False

      initializer:
        name: default

  load_checkpoint: False

  config:
    name: Humanoid
    env_name: rlgpu
    multi_gpu: False
    ppo: True
    mixed_precision: False
    normalize_input: True
    normalize_value: True
    reward_shaper:
      scale_value: 1
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
    learning_rate: 2e-5
    lr_schedule: constant
    score_to_win: 20000
    max_epochs: 100000
    save_best_after: 50
    save_frequency: 50
    print_stats: True
    grad_norm: 1.0
    entropy_coef: 0.0
    truncate_grads: False
    ppo: True
    e_clip: 0.2
    horizon_length: 32
    minibatch_size: 1
    mini_epochs: 6
    critic_coef: 5
    clip_value: False
    seq_len: 4
    bounds_loss_coef: 10
    amp_obs_demo_buffer_size: 200000
    amp_replay_buffer_size: 200000
    amp_replay_keep_prob: 0.01
    amp_batch_size: 32
    amp_minibatch_size: 1
    disc_coef: 5
    disc_logit_reg: 0.01
    disc_grad_penalty: 5
    disc_reward_scale: 2
    disc_weight_decay: 0.0001
    normalize_amp_input: True
    enable_eps_greedy: False

    latent_dim: 64
    latent_steps_min: 1
    latent_steps_max: 150
    
    amp_latent_grad_bonus: 0.00
    amp_latent_grad_bonus_max: 100.0
    amp_diversity_bonus: 0.01
    amp_diversity_tar: 1.0
    
    enc_coef: 5
    enc_weight_decay: 0.0000
    enc_reward_scale: 1
    enc_grad_penalty: 0

    task_reward_w: 0.0
    disc_reward_w: 0.5
    enc_reward_w: 0.5


================================================
FILE: timechamber/learning/common_agent.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import copy
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
import yaml

from rl_games.algos_torch import a2c_continuous
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common import a2c_common
from rl_games.common import datasets
from rl_games.common import schedulers
from rl_games.common import vecenv

import torch
from torch import optim


from tensorboardX import SummaryWriter


class CommonAgent(a2c_continuous.A2CAgent):

    def __init__(self, base_name, params):
    
        a2c_common.A2CBase.__init__(self, base_name, params)

        config = params['config']
        self._load_config_params(config)

        self.is_discrete = False
        self._setup_action_space()
        self.bounds_loss_coef = config.get('bounds_loss_coef', None)
        self.clip_actions = config.get('clip_actions', True)

        self.network_path = config.get('network_path', "./runs")
        self.network_path = os.path.join(self.network_path, self.config['name'])
        self.network_path = os.path.join(self.network_path, 'nn')
        
        net_config = self._build_net_config()
        self.model = self.network.build(net_config)
        self.model.to(self.ppo_device)
        self.states = None

        self.init_rnn_from_model(self.model)
        self.last_lr = float(self.last_lr)

        self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)

        if self.has_central_value:
            cv_config = {
                'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape), 
                'value_size' : self.value_size,
                'ppo_device' : self.ppo_device, 
                'num_agents' : self.num_agents, 
                'num_steps' : self.horizon_length, 
                'num_actors' : self.num_actors, 
                'num_actions' : self.actions_num, 
                'seq_len' : self.seq_len, 
                'model' : self.central_value_config['network'],
                'config' : self.central_value_config, 
                'writter' : self.writer,
                'multi_gpu' : self.multi_gpu
            }
            self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device)

        self.use_experimental_cv = self.config.get('use_experimental_cv', True)
        self.algo_observer.after_init(self)
        
        return

    def init_tensors(self):
        super().init_tensors()
        self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses'])
        self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values'])

        self.tensor_list += ['next_obses']
        return

    def train(self):
        self.init_tensors()
        self.last_mean_rewards = -100500
        start_time = time.time()
        total_time = 0
        rep_count = 0
        self.frame = 0
        self.obs = self.env_reset()
        self.curr_frames = self.batch_size_envs

        self.model_output_file = os.path.join(self.network_path, self.config['name'])

        if self.multi_gpu:
            self.hvd.setup_algo(self)

        self._init_train()

        while True:
            epoch_num = self.update_epoch()
            train_info = self.train_epoch()

            sum_time = train_info['total_time']
            total_time += sum_time
            frame = self.frame
            if self.multi_gpu:
                self.hvd.sync_stats(self)

            if self.rank == 0:
                scaled_time = sum_time
                scaled_play_time = train_info['play_time']
                curr_frames = self.curr_frames
                self.frame += curr_frames
                if self.print_stats:
                    fps_step = curr_frames / scaled_play_time
                    fps_total = curr_frames / scaled_time
                    print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}')

                self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame)
                self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame)
                self.writer.add_scalar('info/epochs', epoch_num, frame)
                self._log_train_info(train_info, frame)

                self.algo_observer.after_print_stats(frame, epoch_num, total_time)
                
                if self.game_rewards.current_size > 0:
                    mean_rewards = self.game_rewards.get_mean()
                    mean_lengths = self.game_lengths.get_mean()

                    for i in range(self.value_size):
                        self.writer.add_scalar('rewards/frame'.format(i), mean_rewards[i], frame)
                        self.writer.add_scalar('rewards/iter'.format(i), mean_rewards[i], epoch_num)
                        self.writer.add_scalar('rewards/time'.format(i), mean_rewards[i], total_time)

                    self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame)
                    self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)

                    if self.has_self_play_config:
                        self.self_play_manager.update(self)

                if self.save_freq > 0:
                    if (epoch_num % self.save_freq == 0):
                        self.save(self.model_output_file + "_" + str(epoch_num))

                if epoch_num > self.max_epochs:
                    self.save(self.model_output_file)
                    print('MAX EPOCHS NUM!')
                    return self.last_mean_rewards, epoch_num

                update_time = 0
        return

    def train_epoch(self):
        play_time_start = time.time()
        with torch.no_grad():
            if self.is_rnn:
                batch_dict = self.play_steps_rnn()
            else:
                batch_dict = self.play_steps() 

        play_time_end = time.time()
        update_time_start = time.time()
        rnn_masks = batch_dict.get('rnn_masks', None)
        
        self.set_train()

        self.curr_frames = batch_dict.pop('played_frames')
        self.prepare_dataset(batch_dict)
        self.algo_observer.after_steps()

        if self.has_central_value:
            self.train_central_value()

        train_info = None

        if self.is_rnn:
            frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
            print(frames_mask_ratio)

        for _ in range(0, self.mini_epochs_num):
            ep_kls = []
            for i in range(len(self.dataset)):
                curr_train_info = self.train_actor_critic(self.dataset[i])
                print(type(curr_train_info))
                
                if self.schedule_type == 'legacy':  
                    if self.multi_gpu:
                        curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
                    self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
                    self.update_lr(self.last_lr)

                if (train_info is None):
                    train_info = dict()
                    for k, v in curr_train_info.items():
                        train_info[k] = [v]
                else:
                    for k, v in curr_train_info.items():
                        train_info[k].append(v)
            
            av_kls = torch_ext.mean_list(train_info['kl'])

            if self.schedule_type == 'standard':
                if self.multi_gpu:
                    av_kls = self.hvd.average_value(av_kls, 'ep_kls')
                self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
                self.update_lr(self.last_lr)

        if self.schedule_type == 'standard_epoch':
            if self.multi_gpu:
                av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
            self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
            self.update_lr(self.last_lr)

        update_time_end = time.time()
        play_time = play_time_end - play_time_start
        update_time = update_time_end - update_time_start
        total_time = update_time_end - play_time_start

        train_info['play_time'] = play_time
        train_info['update_time'] = update_time
        train_info['total_time'] = total_time
        self._record_train_batch_info(batch_dict, train_info)

        return train_info

    def play_steps(self):
        self.set_eval()
        
        epinfos = []
        update_list = self.update_list

        for n in range(self.horizon_length):
            self.obs, done_env_ids = self._env_reset_done()
            self.experience_buffer.update_data('obses', n, self.obs['obs'])

            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, masks)
            else:
                res_dict = self.get_action_values(self.obs)

            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k]) 

            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
            shaped_rewards = self.rewards_shaper(rewards)
            self.experience_buffer.update_data('rewards', n, shaped_rewards)
            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)

            terminated = infos['terminate'].float()
            terminated = terminated.unsqueeze(-1)
            next_vals = self._eval_critic(self.obs)
            next_vals *= (1.0 - terminated)
            self.experience_buffer.update_data('next_values', n, next_vals)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            done_indices = all_done_indices[::self.num_agents]
  
            self.game_rewards.update(self.current_rewards[done_indices])
            self.game_lengths.update(self.current_lengths[done_indices])
            self.algo_observer.process_infos(infos, done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones

        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_next_values = self.experience_buffer.tensor_dict['next_values']
        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        
        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size

        return batch_dict

    def calc_gradients(self, input_dict):
        self.set_train()

        value_preds_batch = input_dict['old_values']
        old_action_log_probs_batch = input_dict['old_logp_actions']
        advantage = input_dict['advantages']
        old_mu_batch = input_dict['mu']
        old_sigma_batch = input_dict['sigma']
        return_batch = input_dict['returns']
        actions_batch = input_dict['actions']
        obs_batch = input_dict['obs']
        obs_batch = self._preproc_obs(obs_batch)

        lr = self.last_lr
        kl = 1.0
        lr_mul = 1.0
        curr_e_clip = lr_mul * self.e_clip

        batch_dict = {
            'is_train': True,
            'prev_actions': actions_batch, 
            'obs' : obs_batch
        }

        rnn_masks = None
        if self.is_rnn:
            rnn_masks = input_dict['rnn_masks']
            batch_dict['rnn_states'] = input_dict['rnn_states']
            batch_dict['seq_length'] = self.seq_len

        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
            res_dict = self.model(batch_dict)
            action_log_probs = res_dict['prev_neglogp']
            values = res_dict['value']
            entropy = res_dict['entropy']
            mu = res_dict['mu']
            sigma = res_dict['sigma']

            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
            a_loss = a_info['actor_loss']

            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
            c_loss = c_info['critic_loss']

            b_loss = self.bound_loss(mu)

            losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss, entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks)
            a_loss, c_loss, entropy, b_loss = losses[0], losses[1], losses[2], losses[3]
            
            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss
            
            if self.multi_gpu:
                self.optimizer.zero_grad()
            else:
                for param in self.model.parameters():
                    param.grad = None

        self.scaler.scale(loss).backward()
        #TODO: Refactor this ugliest code of the year
        if self.truncate_grads:
            if self.multi_gpu:
                self.optimizer.synchronize()
                self.scaler.unscale_(self.optimizer)
                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
                with self.optimizer.skip_synchronize():
                    self.scaler.step(self.optimizer)
                    self.scaler.update()
            else:
                self.scaler.unscale_(self.optimizer)
                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
                self.scaler.step(self.optimizer)
                self.scaler.update()    
        else:
            self.scaler.step(self.optimizer)
            self.scaler.update()

        with torch.no_grad():
            reduce_kl = not self.is_rnn
            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
            if self.is_rnn:
                kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel()  #/ sum_mask
                    
        self.train_result = {
            'entropy': entropy,
            'kl': kl_dist,
            'last_lr': self.last_lr, 
            'lr_mul': lr_mul, 
            'b_loss': b_loss
        }
        self.train_result.update(a_info)
        self.train_result.update(c_info)

        return

    def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values):
        lastgaelam = 0
        mb_advs = torch.zeros_like(mb_rewards)

        for t in reversed(range(self.horizon_length)):
            not_done = 1.0 - mb_fdones[t]
            not_done = not_done.unsqueeze(1)

            delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t]
            lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam
            mb_advs[t] = lastgaelam

        return mb_advs

    def bound_loss(self, mu):
        if self.bounds_loss_coef is not None:
            soft_bound = 1.0
            mu_loss_high = torch.maximum(mu - soft_bound, torch.tensor(0, device=self.ppo_device))**2
            mu_loss_low = torch.minimum(mu + soft_bound, torch.tensor(0, device=self.ppo_device))**2
            b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1)
        else:
            b_loss = 0
        return b_loss

    def _load_config_params(self, config):
        self.last_lr = config['learning_rate']
        return

    def _build_net_config(self):
        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
        config = {
            'actions_num' : self.actions_num,
            'input_shape' : obs_shape,
            'num_seqs' : self.num_actors * self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value' : self.normalize_value,
            'normalize_input': self.normalize_input,
        }
        return config

    def _setup_action_space(self):
        action_space = self.env_info['action_space']
        self.actions_num = action_space.shape[0]

        # todo introduce device instead of cuda()
        self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device)
        self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device)
        return

    def _init_train(self):
        return

    def _env_reset_done(self):
        obs, done_env_ids = self.vec_env.reset_done()
        return self.obs_to_tensors(obs), done_env_ids

    def _eval_critic(self, obs_dict):
        self.model.eval()
        obs = obs_dict['obs']

        processed_obs = self._preproc_obs(obs)
        if self.normalize_input:
            processed_obs = self.model.norm_obs(processed_obs)
        value = self.model.a2c_network.eval_critic(processed_obs)

        if self.normalize_value:
            value = self.value_mean_std(value, True)
        return value

    def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip):
        clip_frac = None
        if (self.ppo):
            ratio = torch.exp(old_action_log_probs_batch - action_log_probs)
            surr1 = advantage * ratio
            surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip,
                                    1.0 + curr_e_clip)
            a_loss = torch.max(-surr1, -surr2)

            clipped = torch.abs(ratio - 1.0) > curr_e_clip
            clip_frac = torch.mean(clipped.float())
            clip_frac = clip_frac.detach()
        else:
            a_loss = (action_log_probs * advantage)
    
        info = {
            'actor_loss': a_loss,
            'actor_clip_frac': clip_frac
        }
        return info

    def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value):
        if clip_value:
            value_pred_clipped = value_preds_batch + \
                    (values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip)
            value_losses = (values - return_batch)**2
            value_losses_clipped = (value_pred_clipped - return_batch)**2
            c_loss = torch.max(value_losses, value_losses_clipped)
        else:
            c_loss = (return_batch - values)**2

        info = {
            'critic_loss': c_loss
        }
        return info
    
    def _record_train_batch_info(self, batch_dict, train_info):
        return

    def _log_train_info(self, train_info, frame):
        self.writer.add_scalar('performance/update_time', train_info['update_time'], frame)
        self.writer.add_scalar('performance/play_time', train_info['play_time'], frame)
        self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame)
        self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame)
        
        self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame)
        self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame)
        self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame)
        self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame)
        self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame)
        self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame)
        self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame)
        return


================================================
FILE: timechamber/learning/common_player.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]

import torch 

from rl_games.algos_torch import players
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common.player import BasePlayer


class CommonPlayer(players.PpoPlayerContinuous):

    def __init__(self, params):
        BasePlayer.__init__(self, params)
        self.network = self.config['network']

        self.normalize_input = self.config['normalize_input']
        self.normalize_value = self.config['normalize_value']
        
        self._setup_action_space()
        self.mask = [False]
        
        net_config = self._build_net_config()
        self._build_net(net_config)   
        
        return

    def run(self):
        n_games = self.games_num
        render = self.render_env
        n_game_life = self.n_game_life
        is_determenistic = self.is_determenistic
        sum_rewards = 0
        sum_steps = 0
        sum_game_res = 0
        n_games = n_games * n_game_life
        games_played = 0
        has_masks = False
        has_masks_func = getattr(self.env, "has_action_mask", None) is not None

        op_agent = getattr(self.env, "create_agent", None)
        if op_agent:
            agent_inited = True

        if has_masks_func:
            has_masks = self.env.has_action_mask()

        need_init_rnn = self.is_rnn
        for _ in range(n_games):
            if games_played >= n_games:
                break

            obs_dict = self.env_reset(self.env)
            batch_size = 1
            batch_size = self.get_batch_size(obs_dict['obs'], batch_size)

            if need_init_rnn:
                self.init_rnn()
                need_init_rnn = False

            cr = torch.zeros(batch_size, dtype=torch.float32)
            steps = torch.zeros(batch_size, dtype=torch.float32)

            print_game_res = False

            for n in range(self.max_steps):
                obs_dict, done_env_ids = self._env_reset_done()

                if has_masks:
                    masks = self.env.get_action_mask()
                    action = self.get_masked_action(obs_dict, masks, is_determenistic)
                else:
                    action = self.get_action(obs_dict, is_determenistic)
                obs_dict, r, done, info =  self.env_step(self.env, action)
                cr += r
                steps += 1
  
                self._post_step(info)

                if render:
                    self.env.render(mode = 'human')
                    time.sleep(self.render_sleep)

                all_done_indices = done.nonzero(as_tuple=False)
                done_indices = all_done_indices[::self.num_agents]
                done_count = len(done_indices)
                games_played += done_count

                if done_count > 0:
                    if self.is_rnn:
                        for s in self.states:
                            s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0

                    cur_rewards = cr[done_indices].sum().item()
                    cur_steps = steps[done_indices].sum().item()

                    cr = cr * (1.0 - done.float())
                    steps = steps * (1.0 - done.float())
                    sum_rewards += cur_rewards
                    sum_steps += cur_steps

                    game_res = 0.0
                    if isinstance(info, dict):
                        if 'battle_won' in info:
                            print_game_res = True
                            game_res = info.get('battle_won', 0.5)
                        if 'scores' in info:
                            print_game_res = True
                            game_res = info.get('scores', 0.5)
                    if self.print_stats:
                        if print_game_res:
                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
                        else:
                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)

                    sum_game_res += game_res
                    if batch_size//self.num_agents == 1 or games_played >= n_games:
                        break

        print(sum_rewards)
        if print_game_res:
            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
        else:
            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)

        return

    def obs_to_torch(self, obs):
        obs = super().obs_to_torch(obs)
        obs_dict = {
            'obs': obs
        }
        return obs_dict

    def get_action(self, obs_dict, is_determenistic = False):
        output = super().get_action(obs_dict['obs'], is_determenistic)
        return output

    def _build_net(self, config):
        self.model = self.network.build(config)
        self.model.to(self.device)
        self.model.eval()
        self.is_rnn = self.model.is_rnn()

        return

    def _env_reset_done(self):
        obs, done_env_ids = self.env.reset_done()
        return self.obs_to_torch(obs), done_env_ids

    def _post_step(self, info):
        return

    def _build_net_config(self):
        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
        config = {
            'actions_num' : self.actions_num,
            'input_shape' : obs_shape,
            'num_seqs' : self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value': self.normalize_value,
            'normalize_input': self.normalize_input,
        } 
        return config

    def _setup_action_space(self):
        self.actions_num = self.action_space.shape[0] 
        self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
        self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
        return

================================================
FILE: timechamber/learning/hrl_sp_agent.py
================================================
import copy
from collections import OrderedDict
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
from .pfsp_player_pool import PFSPPlayerPool, SinglePlayer, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
    PFSPPlayerVectorizedPool
from rl_games.common.a2c_common import swap_and_flatten01
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from isaacgym.torch_utils import *
import torch
from torch import optim
from tensorboardX import SummaryWriter
import torch.distributed as dist
import timechamber.ase.hrl_agent as hrl_agent
from timechamber.utils.utils import load_check, load_checkpoint

class HRLSPAgent(hrl_agent.HRLAgent):
    def __init__(self, base_name, params):
        params['config']['device'] = params['device']
        super().__init__(base_name, params)
        self.player_pool_type = params['player_pool_type']
        self.base_model_config = {
            'actions_num': self.actions_num,
            'input_shape': self.obs_shape,
            'num_seqs': self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value': self.normalize_value,
            'normalize_input': self.normalize_input,
        }
        self.max_his_player_num = params['player_pool_length']

        if params['op_load_path']:
            self.init_op_model = self.create_model()
            self.restore_op(params['op_load_path'])
        else:
            self.init_op_model = self.model
        self.players_dir = os.path.join(self.experiment_dir, 'policy_dir')
        os.makedirs(self.players_dir, exist_ok=True)
        self.update_win_rate = params['update_win_rate']
        self.num_opponent_agents = params['num_agents'] - 1
        self.player_pool = self._build_player_pool(params)

        self.games_to_check = params['games_to_check']
        self.now_update_steps = 0
        self.max_update_steps = params['max_update_steps']
        self.update_op_num = 0
        self.update_player_pool(self.init_op_model, player_idx=self.update_op_num)
        self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))

        assert self.num_actors % self.max_his_player_num == 0

    def _build_player_pool(self, params):
        if self.player_pool_type == 'vectorized':
            vector_model_config = self.base_model_config
            vector_model_config['num_envs'] = self.num_actors * self.num_opponent_agents
            vector_model_config['population_size'] = self.max_his_player_num

            return PFSPPlayerVectorizedPool(max_length=self.max_his_player_num, device=self.device,
                                            vector_model_config=vector_model_config, params=params)
        else:
            return PFSPPlayerPool(max_length=self.max_his_player_num, device=self.device)

    def play_steps(self):
        self.set_eval()

        env_done_indices = torch.tensor([], device=self.device, dtype=torch.long)
        update_list = self.update_list
        step_time = 0.0

        for n in range(self.horizon_length):
            self.obs = self.env_reset(env_done_indices)
            self.experience_buffer.update_data('obses', n, self.obs['obs'])
            
            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, masks)
            else:
                res_dict_op = self.get_action_values(self.obs, is_op=True)
                res_dict = self.get_action_values(self.obs)
            
            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k])
            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            if self.player_pool_type == 'multi_thread':
                self.player_pool.thread_pool.shutdown()
            step_time_start = time.time()
            
            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'],
                                                                 res_dict_op['actions'])
            step_time_end = time.time()
            step_time += (step_time_end - step_time_start)

            shaped_rewards = self.rewards_shaper(rewards)
            if self.value_bootstrap and 'time_outs' in infos:
                shaped_rewards += self.gamma * res_dict['values'] * self.cast_obs(infos['time_outs']).unsqueeze(
                    1).float()

            self.experience_buffer.update_data('rewards', n, shaped_rewards)
            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)
            self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards'])

            terminated = infos['terminate'].float()
            terminated = terminated.unsqueeze(-1)
            next_vals = self._eval_critic(self.obs)
            next_vals *= (1.0 - terminated)
            self.experience_buffer.update_data('next_values', n, next_vals)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            env_done_indices = self.dones.view(self.num_actors, self.num_agents).all(dim=1).nonzero(as_tuple=False)

            self.game_rewards.update(self.current_rewards[env_done_indices])
            self.game_lengths.update(self.current_lengths[env_done_indices])
            self.algo_observer.process_infos(infos, env_done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones

            self.player_pool.update_player_metric(infos=infos)
            self.resample_op(all_done_indices.flatten())
            
            env_done_indices = env_done_indices[:, 0]

        last_values = self.get_values(self.obs)

        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_next_values = self.experience_buffer.tensor_dict['next_values']

        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards']
        mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards)

        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size
        batch_dict['step_time'] = step_time
        return batch_dict

    def env_step(self, ego_actions, op_actions):
        ego_actions = self.preprocess_actions(ego_actions)
        op_actions = self.preprocess_actions(op_actions)
        obs = self.obs['obs']
        obs_op = self.obs['obs_op']

        rewards = 0.0
        disc_rewards = 0.0
        done_count = 0.0
        terminate_count = 0.0
        win_count = 0.0
        lose_count = 0.0
        draw_count = 0.0

        for t in range(self._llc_steps):
            llc_ego_actions = self._compute_llc_action(obs, ego_actions)
            llc_op_actions = self._compute_llc_action(obs_op, op_actions)
            llc_actions = torch.cat((llc_ego_actions, llc_op_actions), dim=0)

            obs_dict, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions)

            rewards += curr_rewards
            done_count += curr_dones
            terminate_count += infos['terminate']
            win_count += infos['win']
            lose_count += infos['lose']
            draw_count += infos['draw']

            amp_obs = infos['amp_obs']
            curr_disc_reward = self._calc_disc_reward(amp_obs)
            disc_rewards += curr_disc_reward

            obs = obs_dict['obs'][:self.num_actors]
            obs_op = obs_dict['obs'][self.num_actors:]

        rewards /= self._llc_steps
        disc_rewards /= self._llc_steps

        dones = torch.zeros_like(done_count)
        dones[done_count > 0] = 1.0
        terminate = torch.zeros_like(terminate_count)
        terminate[terminate_count > 0] = 1.0
        infos['terminate'] = terminate
        infos['disc_rewards'] = disc_rewards
        
        wins = torch.zeros_like(win_count)
        wins[win_count > 0] = 1.0
        infos['win'] = wins
        
        loses = torch.zeros_like(lose_count)
        loses[lose_count > 0] = 1.0
        infos['lose'] = loses
        
        draws = torch.zeros_like(draw_count)
        draws[draw_count > 0] = 1.0
        infos['draw'] = draws

        obs_dict = {}
        obs_dict['obs'] = obs
        obs_dict['obs_op'] = obs_op

        if self.is_tensor_obses:
            if self.value_size == 1:
                rewards = rewards.unsqueeze(1)
            return self.obs_to_tensors(obs_dict), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
        else:
            if self.value_size == 1:
                rewards = np.expand_dims(rewards, axis=1)
            return self.obs_to_tensors(obs_dict), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(dones).to(self.ppo_device), infos

    def env_reset(self, env_ids=None):
        obs = self.vec_env.reset(env_ids)
        obs = self.obs_to_tensors(obs)
        obs['obs_op'] = obs['obs'][self.num_actors:]
        obs['obs'] = obs['obs'][:self.num_actors]

        return obs

    def train(self):
        self.init_tensors()
        self.mean_rewards = self.last_mean_rewards = -100500
        start_time = time.time()
        total_time = 0
        rep_count = 0
        # self.frame = 0  # loading from checkpoint
        self.obs = self.env_reset()

        if self.multi_gpu:
            torch.cuda.set_device(self.rank)
            print("====================broadcasting parameters")
            model_params = [self.model.state_dict()]
            dist.broadcast_object_list(model_params, 0)
            self.model.load_state_dict(model_params[0])

        self._init_train()

        while True:
            epoch_num = self.update_epoch()
            train_info = self.train_epoch()
            print(f"epoch num: {epoch_num}")
            sum_time = train_info['total_time']
            step_time = train_info['step_time']
            play_time = train_info['play_time']
            update_time = train_info['update_time']
            a_losses = train_info['actor_loss']
            c_losses = train_info['critic_loss']
            entropies = train_info['entropy']
            kls = train_info['kl']
            last_lr = train_info['last_lr'][-1]
            lr_mul = train_info['lr_mul'][-1]

            # cleaning memory to optimize space
            self.dataset.update_values_dict(None)
            total_time += sum_time
            curr_frames = self.curr_frames * self.rank_size if self.multi_gpu else self.curr_frames
            self.frame += curr_frames
            should_exit = False

            if self.rank == 0:
                self.diagnostics.epoch(self, current_epoch=epoch_num)
                scaled_time = self.num_agents * sum_time
                scaled_play_time = self.num_agents * play_time

                frame = self.frame // self.num_agents

                if self.print_stats:
                    step_time = max(step_time, 1e-6)
                    fps_step = curr_frames / step_time
                    fps_step_inference = curr_frames / scaled_play_time
                    fps_total = curr_frames / scaled_time
                    print(
                        f'fps step: {fps_step:.0f} fps step and policy inference: {fps_step_inference:.0f} fps total: {fps_total:.0f} epoch: {epoch_num}/{self.max_epochs}')

                self.write_stats(total_time, epoch_num, step_time, play_time, update_time, a_losses, c_losses,
                                 entropies, kls, last_lr, lr_mul, frame, scaled_time, scaled_play_time, curr_frames)

                self.algo_observer.after_print_stats(frame, epoch_num, total_time)

                if self.game_rewards.current_size > 0:
                    mean_rewards = self.game_rewards.get_mean()
                    mean_lengths = self.game_lengths.get_mean()
                    self.mean_rewards = mean_rewards[0]

                    for i in range(self.value_size):
                        rewards_name = 'rewards' if i == 0 else 'rewards{0}'.format(i)
                        self.writer.add_scalar(rewards_name + '/step'.format(i), mean_rewards[i], frame)
                        self.writer.add_scalar(rewards_name + '/iter'.format(i), mean_rewards[i], epoch_num)
                        self.writer.add_scalar(rewards_name + '/time'.format(i), mean_rewards[i], total_time)

                    self.writer.add_scalar('episode_lengths/step', mean_lengths, frame)
                    self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
                    self.writer.add_scalar('episode_lengths/time', mean_lengths, total_time)

                    # removed equal signs (i.e. "rew=") from the checkpoint name since it messes with hydra CLI parsing
                    checkpoint_name = self.config['name'] + '_ep_' + str(epoch_num) + '_rew_' + str(mean_rewards[0])

                    if self.save_freq > 0:
                        if (epoch_num % self.save_freq == 0) and (mean_rewards <= self.last_mean_rewards):
                            self.save(os.path.join(self.nn_dir, 'last_' + checkpoint_name))

                    if mean_rewards[0] > self.last_mean_rewards and epoch_num >= self.save_best_after:
                        print('saving next best rewards: ', mean_rewards)
                        self.last_mean_rewards = mean_rewards[0]
                        self.save(os.path.join(self.nn_dir, self.config['name']))

                        if 'score_to_win' in self.config:
                            if self.last_mean_rewards > self.config['score_to_win']:
                                print('Network won!')
                                self.save(os.path.join(self.nn_dir, checkpoint_name))
                                should_exit = True

                if epoch_num >= self.max_epochs:
                    if self.game_rewards.current_size == 0:
                        print('WARNING: Max epochs reached before any env terminated at least once')
                        mean_rewards = -np.inf

                    self.save(os.path.join(self.nn_dir,
                                           'last_' + self.config['name'] + 'ep' + str(epoch_num) + 'rew' + str(
                                               mean_rewards)))
                    print('MAX EPOCHS NUM!')
                    should_exit = True
                self.update_metric()
                update_time = 0

            if self.multi_gpu:
                should_exit_t = torch.tensor(should_exit, device=self.device).float()
                dist.broadcast(should_exit_t, 0)
                should_exit = should_exit_t.bool().item()
            if should_exit:
                return self.last_mean_rewards, epoch_num

    def update_metric(self):
        tot_win_rate = 0
        tot_games_num = 0
        self.now_update_steps += 1
        # self_player process
        for player in self.player_pool.players:
            win_rate = player.win_rate()
            games = player.games_num()
            self.writer.add_scalar(f'rate/win_rate_player_{player.player_idx}', win_rate, self.epoch_num)
            tot_win_rate += win_rate * games
            tot_games_num += games
        win_rate = tot_win_rate / tot_games_num
        if tot_games_num > self.games_to_check:
            self.check_update_opponent(win_rate)
        self.writer.add_scalar('rate/win_rate', win_rate, self.epoch_num)

    def get_action_values(self, obs, is_op=False):
        processed_obs = self._preproc_obs(obs['obs_op'] if is_op else obs['obs'])
        if not is_op:
            self.model.eval()
        input_dict = {
            'is_train': False,
            'prev_actions': None,
            'obs': processed_obs,
            'rnn_states': self.rnn_states
        }
        with torch.no_grad():
            if is_op:
                res_dict = {
                    "actions": torch.zeros((self.num_actors * self.num_opponent_agents, self.actions_num),
                                           device=self.device),
                    "values": torch.zeros((self.num_actors * self.num_opponent_agents, 1), device=self.device)
                }
                self.player_pool.inference(input_dict, res_dict, processed_obs)
            else:
                res_dict = self.model(input_dict)
            if self.has_central_value:
                states = obs['states']
                input_dict = {
                    'is_train': False,
                    'states': states,
                }
                value = self.get_central_value(input_dict)
                res_dict['values'] = value
        return res_dict

    def restore(self, fn):
        checkpoint = load_checkpoint(fn, device=self.device)
        checkpoint = load_check(checkpoint=checkpoint,
                                normalize_input=self.normalize_input,
                                normalize_value=self.normalize_value)
        self.set_full_state_weights(checkpoint)

    def resample_op(self, resample_indices):
        for op_idx in range(self.num_opponent_agents):
            for player in self.player_pool.players:
                player.remove_envs(resample_indices + op_idx * self.num_actors)
        for op_idx in range(self.num_opponent_agents):
            for env_idx in resample_indices:
                player = self.player_pool.sample_player()
                player.add_envs(env_idx + op_idx * self.num_actors)
        for player in self.player_pool.players:
            player.reset_envs()

    def resample_batch(self):
        env_indices = torch.arange(end=self.num_actors * self.num_opponent_agents,
                                   device=self.device, dtype=torch.long,
                                   requires_grad=False)
        step = self.num_actors // 32
        for player in self.player_pool.players:
            player.clear_envs()
        for i in range(0, self.num_actors, step):
            player = self.player_pool.sample_player()
            player.add_envs(env_indices[i:i + step])
        print("resample done")

    def restore_op(self, fn):
        checkpoint = load_checkpoint(fn, device=self.device)
        checkpoint = load_check(checkpoint, normalize_input=self.normalize_input,
                                normalize_value=self.normalize_value)
        self.init_op_model.load_state_dict(checkpoint['model'])
        if self.normalize_input and 'running_mean_std' in checkpoint:
            self.init_op_model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])

    def check_update_opponent(self, win_rate):
        if win_rate > self.update_win_rate or self.now_update_steps > self.max_update_steps:
            print(f'winrate:{win_rate},add opponent to player pool')
            self.update_op_num += 1
            self.now_update_steps = 0
            self.update_player_pool(self.model, player_idx=self.update_op_num)
            self.player_pool.clear_player_metric()
            self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))
            self.save(os.path.join(self.players_dir, f'policy_{self.update_op_num}'))

    def create_model(self):
        model = self.network.build(self.base_model_config)
        model.to(self.device)
        return model

    def update_player_pool(self, model, player_idx):
        new_model = self.create_model()
        new_model.load_state_dict(copy.deepcopy(model.state_dict()))
        if hasattr(model, 'running_mean_std'):
            new_model.running_mean_std.load_state_dict(copy.deepcopy(model.running_mean_std.state_dict()))
        player = SinglePlayer(player_idx, new_model, self.device, self.num_actors * self.num_opponent_agents)
        self.player_pool.add_player(player)


================================================
FILE: timechamber/learning/hrl_sp_player.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import os
import time
import torch
import numpy as np
from rl_games.algos_torch import players
import random
from rl_games.algos_torch import torch_ext
from rl_games.common.tr_helpers import unsqueeze_obs
from timechamber.ase import hrl_players
from timechamber.utils.utils import load_check, load_checkpoint
from .pfsp_player_pool import PFSPPlayerPool, PFSPPlayerVectorizedPool, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
    SinglePlayer
import matplotlib.pyplot as plt

from multielo import MultiElo


class HRLSPPlayer(hrl_players.HRLPlayer):
    def __init__(self, params):
        params['config']['device_name'] = params['device']
        super().__init__(params)
        print(f'params:{params}')
        self.network = self.config['network']
        self.mask = [False]
        self.is_rnn = False
        self.normalize_input = self.config['normalize_input']
        self.normalize_value = self.config.get('normalize_value', False)
        self.base_model_config = {
            'actions_num': self.actions_num,
            'input_shape': self.obs_shape,
            'num_seqs': self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value': self.normalize_value,
            'normalize_input': self.normalize_input,
        }
        self.policy_timestep = []
        self.policy_op_timestep = []
        self.params = params
        self.record_elo = self.player_config.get('record_elo', False)
        self.init_elo = self.player_config.get('init_elo', 400)
        self.num_actors = params['config']['num_actors']
        self.player_pool_type = params['player_pool_type']
        self.player_pool = None
        self.op_player_pool = None
        self.num_opponents = params['num_agents'] - 1
        self.max_steps = 1000
        self.update_op_num = 0
        self.players_per_env = []
        self.elo = MultiElo()

    def restore(self, load_dir):
        if os.path.isdir(load_dir):
            self.player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
            print('dir:', load_dir)
            sorted_players = []
            for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
                model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
                self.policy_timestep.append(model_timestep)
                model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
                new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
                                          rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
                sorted_players.append(new_player)
            sorted_players.sort(key=lambda player: player.player_idx)
            for idx, player in enumerate(sorted_players):
                player.player_idx = idx
                self.player_pool.add_player(player)
            self.policy_timestep.sort()
        else:
            self.player_pool = self._build_player_pool(params=self.params, player_num=1)
            self.policy_timestep.append(os.path.getmtime(load_dir))
            model = self.load_model(load_dir)
            new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
                                      rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
            self.player_pool.add_player(new_player)
        self.restore_op(self.params['op_load_path'])
        self._norm_policy_timestep()
        self._alloc_env_indices()

    def restore_op(self, load_dir):
        if os.path.isdir(load_dir):
            self.op_player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
            sorted_players = []
            for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
                model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
                self.policy_op_timestep.append(model_timestep)
                model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
                new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
                                          rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
                sorted_players.append(new_player)
            sorted_players.sort(key=lambda player: player.player_idx)
            for idx, player in enumerate(sorted_players):
                player.player_idx = idx
                self.op_player_pool.add_player(player)
            self.policy_op_timestep.sort()
        else:
            self.op_player_pool = self._build_player_pool(params=self.params, player_num=1)
            self.policy_op_timestep.append(os.path.getmtime(load_dir))
            model = self.load_model(load_dir)
            new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
                                      rating=400, obs_batch_len=self.num_actors * self.num_opponents)
            self.op_player_pool.add_player(new_player)

    def _alloc_env_indices(self):
        for idx in range(self.num_actors):
            player_idx = random.randint(0, len(self.player_pool.players) - 1)
            self.player_pool.players[player_idx].add_envs(torch.tensor([idx], dtype=torch.long, device=self.device))
            env_player = [self.player_pool.players[player_idx]]
            for op_idx in range(self.num_opponents):
                op_player_idx = random.randint(0, len(self.op_player_pool.players) - 1)
                self.op_player_pool.players[op_player_idx].add_envs(
                    torch.tensor([idx + op_idx * self.num_actors], dtype=torch.long, device=self.device))
                env_player.append(self.op_player_pool.players[op_player_idx])
            self.players_per_env.append(env_player)
        for player in self.player_pool.players:
            player.reset_envs()
        for player in self.op_player_pool.players:
            player.reset_envs()

    def _build_player_pool(self, params, player_num):

        if self.player_pool_type == 'multi_thread':
            return PFSPPlayerProcessPool(max_length=player_num,
                                         device=self.device)
        elif self.player_pool_type == 'multi_process':
            return PFSPPlayerThreadPool(max_length=player_num,
                                        device=self.device)
        elif self.player_pool_type == 'vectorized':
            vector_model_config = self.base_model_config
            vector_model_config['num_envs'] = self.num_actors * self.num_opponents
            vector_model_config['population_size'] = player_num

            return PFSPPlayerVectorizedPool(max_length=player_num, device=self.device,
                                            vector_model_config=vector_model_config, params=params)
        else:
            return PFSPPlayerPool(max_length=player_num, device=self.device)

    def _update_rating(self, info, env_indices):
        for env_idx in env_indices:
            if self.num_opponents == 1:
                player = self.players_per_env[env_idx][0]
                op_player = self.players_per_env[env_idx][1]
                if info['win'][env_idx]:
                    player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating])
                elif info['lose'][env_idx]:
                    op_player.rating, player.rating = self.elo.get_new_ratings([op_player.rating, player.rating])
                elif info['draw'][env_idx]:
                    player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating],
                                                                               result_order=[1, 1])
            else:
                ranks = info['ranks'][env_idx].cpu().numpy()
                players_sorted_by_rank = sorted(enumerate(self.players_per_env[env_idx]), key=lambda x: ranks[x[0]])
                sorted_ranks = sorted(ranks)
                now_ratings = [player.rating for idx, player in players_sorted_by_rank]
                new_ratings = self.elo.get_new_ratings(now_ratings, result_order=sorted_ranks)
                for idx, new_rating in enumerate(new_ratings):
                    players_sorted_by_rank[idx][1].rating = new_rating

    def run(self):
        n_games = self.games_num
        render = self.render_env
        n_game_life = self.n_game_life
        is_determenistic = self.is_determenistic
        sum_rewards = 0
        sum_steps = 0
        sum_game_res = 0
        n_games = n_games * n_game_life
        games_played = 0
        has_masks = False
        has_masks_func = getattr(self.env, "has_action_mask", None) is not None

        if has_masks_func:
            has_masks = self.env.has_action_mask()
        print(f'games_num:{n_games}')
        need_init_rnn = self.is_rnn
        for _ in range(n_games):
            if games_played >= n_games:
                break

            obses = self.env_reset(self.env)
            batch_size = 1
            batch_size = self.get_batch_size(obses['obs'], batch_size)

            if need_init_rnn:
                self.init_rnn()
                need_init_rnn = False

            cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
            steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)

            print_game_res = False
            done_indices = torch.tensor([], device=self.device, dtype=torch.long)

            for n in range(self.max_steps):
                obses = self.env_reset(self.env, done_indices)
                if has_masks:
                    masks = self.env.get_action_mask()
                    action = self.get_masked_action(
                        obses, masks, is_determenistic)
                else:
                    action = self.get_action(obses['obs'], is_determenistic)
                    action_op = self.get_action(obses['obs_op'], is_determenistic, is_op=True)
                obses, r, done, info = self.env_step(self.env, obses, action, action_op)
                cr += r
                steps += 1

                if render:
                    self.env.render(mode='human')
                    time.sleep(self.render_sleep)

                all_done_indices = done.nonzero(as_tuple=False)
                done_indices = all_done_indices[::self.num_agents]
                done_count = len(done_indices)
                games_played += done_count
                if self.record_elo:
                    self._update_rating(info, all_done_indices.flatten())
                if done_count > 0:
                    if self.is_rnn:
                        for s in self.states:
                            s[:, all_done_indices, :] = s[:, all_done_indices, :] * 0.0

                    cur_rewards = cr[done_indices].sum().item()
                    cur_steps = steps[done_indices].sum().item()

                    cr = cr * (1.0 - done.float())
                    steps = steps * (1.0 - done.float())
                    sum_rewards += cur_rewards
                    sum_steps += cur_steps

                    game_res = 0.0
                    if isinstance(info, dict):
                        if 'battle_won' in info:
                            print_game_res = True
                            game_res = info.get('battle_won', 0.5)
                        if 'scores' in info:
                            print_game_res = True
                            game_res = info.get('scores', 0.5)
                    if self.print_stats:
                        if print_game_res:
                            print('reward:', cur_rewards / done_count,
                                  'steps:', cur_steps / done_count, 'w:', game_res)
                        else:
                            print('reward:', cur_rewards / done_count,
                                  'steps:', cur_steps / done_count)

                    sum_game_res += game_res
                    if batch_size // self.num_agents == 1 or games_played >= n_games:
                        break
                done_indices = done_indices[:, 0]
        if self.record_elo:
            self._plot_elo_curve()

    def _plot_elo_curve(self):
        x = np.array(self.policy_timestep)
        y = np.arange(len(self.player_pool.players))
        x_op = np.array(self.policy_op_timestep)
        y_op = np.arange(len(self.op_player_pool.players))
        for player in self.player_pool.players:
            idx = player.player_idx
            y[idx] = player.rating
        for player in self.op_player_pool.players:
            idx = player.player_idx
            y_op[idx] = player.rating
        if self.params['load_path'] != self.params['op_load_path']:
            l1 = plt.plot(x, y, 'b--', label='policy')
            l2 = plt.plot(x_op, y_op, 'r--', label='policy_op')
            plt.plot(x, y, 'b^-', x_op, y_op, 'ro-')
        else:
            l1 = plt.plot(x, y, 'b--', label='policy')
            plt.plot(x, y, 'b^-')
        plt.title('ELO Curve')
        plt.xlabel('timestep/days')
        plt.ylabel('ElO')
        plt.legend()
        plt.savefig(self.params['load_path'] + '/../elo.jpg')

    def get_action(self, obs, is_determenistic=False, is_op=False):
        if self.has_batch_dimension == False:
            obs = unsqueeze_obs(obs)
        obs = self._preproc_obs(obs)
        input_dict = {
            'is_train': False,
            'prev_actions': None,
            'obs': obs,
            'rnn_states': self.states
        }
        with torch.no_grad():
            data_len = self.num_actors * self.num_opponents if is_op else self.num_actors
            res_dict = {
                "actions": torch.zeros((data_len, self.actions_num), device=self.device),
                "values": torch.zeros((data_len, 1), device=self.device),
                "mus": torch.zeros((data_len, self.actions_num), device=self.device)
            }
            if is_op:
                self.op_player_pool.inference(input_dict, res_dict, obs)
            else:
                self.player_pool.inference(input_dict, res_dict, obs)
        mu = res_dict['mus']
        action = res_dict['actions']
        if is_determenistic:
            current_action = mu
        else:
            current_action = action

        current_action = torch.squeeze(current_action.detach())
        return torch.clamp(current_action, -1.0, 1.0)

    def _norm_policy_timestep(self):
        self.policy_op_timestep.sort()
        self.policy_timestep.sort()
        for idx in range(1, len(self.policy_op_timestep)):
            self.policy_op_timestep[idx] -= self.policy_op_timestep[0]
            self.policy_op_timestep[idx] /= 3600 * 24
        for idx in range(1, len(self.policy_timestep)):
            self.policy_timestep[idx] -= self.policy_timestep[0]
            self.policy_timestep[idx] /= 3600 * 24
        self.policy_timestep[0] = 0
        if len(self.policy_op_timestep):
            self.policy_op_timestep[0] = 0

    def env_reset(self, env, env_ids=None):
        obs = env.reset(env_ids)
        obs_dict = {}
        obs_dict['obs_op'] = obs[self.num_actors:]
        obs_dict['obs'] = obs[:self.num_actors]
        return obs_dict

    def env_step(self, env, obs_dict, ego_actions, op_actions):
        obs = obs_dict['obs']
        obs_op = obs_dict['obs_op']
        rewards = 0.0
        done_count = 0.0
        disc_rewards = 0.0
        terminate_count = 0.0
        win_count = 0.0
        lose_count = 0.0
        draw_count = 0.0

        for t in range(self._llc_steps):
            llc_ego_actions = self._compute_llc_action(obs, ego_actions)
            llc_op_actions = self._compute_llc_action(obs_op, op_actions)
            llc_actions = torch.cat((llc_ego_actions, llc_op_actions), dim=0)
            obs_all, curr_rewards, curr_dones, infos = env.step(llc_actions)

            rewards += curr_rewards
            done_count += curr_dones

            terminate_count += infos['terminate']
            win_count += infos['win']
            lose_count += infos['lose']
            draw_count += infos['draw']

            amp_obs = infos['amp_obs']
            curr_disc_reward = self._calc_disc_reward(amp_obs)
            curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy()
            disc_rewards += curr_disc_reward

            obs = obs_all[:self.num_actors]
            obs_op = obs_all[self.num_actors:]

        rewards /= self._llc_steps
        disc_rewards /= self._llc_steps
        dones = torch.zeros_like(done_count)
        dones[done_count > 0] = 1.0
        terminate = torch.zeros_like(terminate_count)
        terminate[terminate_count > 0] = 1.0
        infos['terminate'] = terminate
        infos['disc_rewards'] = disc_rewards

        wins = torch.zeros_like(win_count)
        wins[win_count > 0] = 1.0
        infos['win'] = wins
        
        loses = torch.zeros_like(lose_count)
        loses[lose_count > 0] = 1.0
        infos['lose'] = loses
        
        draws = torch.zeros_like(draw_count)
        draws[draw_count > 0] = 1.0
        infos['draw'] = draws

        next_obs_dict = {}
        next_obs_dict['obs_op'] = obs_op
        next_obs_dict['obs'] = obs

        if self.value_size > 1:
            rewards = rewards[0]
        if self.is_tensor_obses:
            return self.obs_to_torch(next_obs_dict), rewards.cpu(), dones.cpu(), infos
        else:
            if np.isscalar(dones):
                rewards = np.expand_dims(np.asarray(rewards), 0)
                dones = np.expand_dims(np.asarray(dones), 0)
            return next_obs_dict, rewards, dones, infos

    def create_model(self):
        model = self.network.build(self.base_model_config)
        model.to(self.device)
        return model

    def load_model(self, fn):
        model = self.create_model()
        checkpoint = load_checkpoint(fn, device=self.device)
        checkpoint = load_check(checkpoint, normalize_input=self.normalize_input,
                                normalize_value=self.normalize_value)

        model.load_state_dict(checkpoint['model'])

        if self.normalize_input and 'running_mean_std' in checkpoint:
            model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])

        return model


================================================
FILE: timechamber/learning/pfsp_player_pool.py
================================================
import collections

import random
import torch
import torch.multiprocessing as mp
import dill
# import time
from rl_games.algos_torch import model_builder
from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED


def player_inference_thread(model, input_dict, res_dict, env_indices, processed_obs):
    if len(env_indices) == 0:
        return None
    input_dict['obs'] = processed_obs[env_indices]
    out_dict = model(input_dict)
    for key in res_dict:
        res_dict[key][env_indices] = out_dict[key]
    return out_dict


def player_inference_process(pipe, queue, barrier):
    input_dict = {
        'is_train': False,
        'prev_actions': None,
        'obs': None,
        'rnn_states': None,
    }
    model = None
    barrier.wait()
    while True:
        msg = pipe.recv()
        task = msg['task']
        if task == 'init':
            if model is not None:
                del model
            model = queue.get()
            model = dill.loads(model)
            barrier.wait()
        elif task == 'forward':
            obs, actions, values, env_indices = queue.get()
            input_dict['obs'] = obs[env_indices]
            out_dict = model(input_dict)
            actions[env_indices] = out_dict['actions']
            values[env_indices] = out_dict['values']
            barrier.wait()
            del obs, actions, values, env_indices
        elif task == 'terminate':
            break
        else:
            barrier.wait()


class SinglePlayer:
    def __init__(self, player_idx, model, device, obs_batch_len=0, rating=None):
        self.model = model
        if model:
            self.model.eval()
        self.player_idx = player_idx
        self._games = torch.tensor(0, device=device, dtype=torch.float)
        self._wins = torch.tensor(0, device=device, dtype=torch.float)
        self._loses = torch.tensor(0, device=device, dtype=torch.float)
        self._draws = torch.tensor(0, device=device, dtype=torch.float)
        self._decay = 0.998
        self._has_env = torch.zeros((obs_batch_len,), device=device, dtype=torch.bool)
        self.device = device
        self.env_indices = torch.tensor([], device=device, dtype=torch.long, requires_grad=False)
        if rating:
            self.rating = rating

    def __call__(self, input_dict):
        return self.model(input_dict)

    def reset_envs(self):
        self.env_indices = self._has_env.nonzero(as_tuple=True)

    def remove_envs(self, env_indices):
        self._has_env[env_indices] = False

    def add_envs(self, env_indices):
        self._has_env[env_indices] = True

    def clear_envs(self):
        self.env_indices = torch.tensor([], device=self.device, dtype=torch.long, requires_grad=False)

    def update_metric(self, wins, loses, draws):
        win_count = torch.sum(wins[self.env_indices])
        lose_count = torch.sum(loses[self.env_indices])
        draw_count = torch.sum(draws[self.env_indices])
        for stats in (self._games, self._wins, self._loses, self._draws):
            stats *= self._decay
        self._games += win_count + lose_count + draw_count
        self._wins += win_count
        self._loses += lose_count
        self._draws += draw_count

    def clear_metric(self):
        self._games = torch.tensor(0, device=self.device, dtype=torch.float)
        self._wins = torch.tensor(0, device=self.device, dtype=torch.float)
        self._loses = torch.tensor(0, device=self.device, dtype=torch.float)
        self._draws = torch.tensor(0, device=self.device, dtype=torch.float)

    def win_rate(self):
        if self.model is None:
            return 0
        elif self._games == 0:
            return 0.5
        return (self._wins + 0.5 * self._draws) / self._games

    def games_num(self):
        return self._games


class PFSPPlayerPool:
    def __init__(self, max_length, device):
        assert max_length > 0
        self.players = []
        self.max_length = max_length
        self.idx = 0
        self.device = device
        self.weightings = {
            "variance": lambda x: x * (1 - x),
            "linear": lambda x: 1 - x,
            "squared": lambda x: (1 - x) ** 2,
        }

    def add_player(self, player):
        if len(self.players) < self.max_length:
            self.players.append(player)
        else:
            self.players[self.idx] = player
        self.idx += 1
        self.idx %= self.max_length

    def sample_player(self, weight='linear'):
        weight_func = self.weightings[weight]
        player = \
            random.choices(self.players, weights=[weight_func(player.win_rate()) for player in self.players])[0]
        return player

    def update_player_metric(self, infos):
        for player in self.players:
            player.update_metric(infos['win'], infos['lose'], infos['draw'])

    def clear_player_metric(self):
        for player in self.players:
            player.clear_metric()

    def inference(self, input_dict, res_dict, processed_obs):
        for i, player in enumerate(self.players):
            if len(player.env_indices[0]) == 0:
                continue
            input_dict['obs'] = processed_obs[player.env_indices]
            out_dict = player(input_dict)
            for key in res_dict:
                res_dict[key][player.env_indices] = out_dict[key]


class PFSPPlayerVectorizedPool(PFSPPlayerPool):
    def __init__(self, max_length, device, vector_model_config, params):
        super(PFSPPlayerVectorizedPool, self).__init__(max_length, device)
        params['model']['name'] = 'vectorized_a2c'
        params['network']['name'] = 'vectorized_a2c'
        builder = model_builder.ModelBuilder()
        self.vectorized_network = builder.load(params)
        self.vectorized_model = self.vectorized_network.build(vector_model_config)
        self.vectorized_model.to(self.device)
        self.vectorized_model.eval()
        self.obs = torch.zeros(
            (self.max_length, vector_model_config["num_envs"], vector_model_config['input_shape'][0]),
            dtype=torch.float32, device=self.device)
        for idx in range(max_length):
            self.add_player(SinglePlayer(idx, None, self.device, vector_model_config["num_envs"]))

    def inference(self, input_dict, res_dict, processed_obs):
        for i, player in enumerate(self.players):
            self.obs[i][player.env_indices] = processed_obs[player.env_indices]
        input_dict['obs'] = self.obs
        out_dict = self.vectorized_model(input_dict)
        for i, player in enumerate(self.players):
            if len(player.env_indices) == 0:
                continue
            for key in res_dict:
                res_dict[key][player.env_indices] = out_dict[key][i][player.env_indices]

    def add_player(self, player):
        if player.model:
            self.vectorized_model.update(self.idx, player.model)
        super().add_player(player)


class PFSPPlayerThreadPool(PFSPPlayerPool):
    def __init__(self, max_length, device):
        super().__init__(max_length, device)
        self.thread_pool = ThreadPoolExecutor(max_workers=self.max_length)

    def inference(self, input_dict, res_dict, processed_obs):
        self.thread_pool.map(player_inference_thread, [player.model for player in self.players],
                             [input_dict for _ in range(len(self.players))],
                             [res_dict for _ in range(len(self.players))],
                             [player.env_indices for player in self.players],
                             [processed_obs for _ in range(len(self.players))])


class PFSPPlayerProcessPool(PFSPPlayerPool):
    def __init__(self, max_length, device):
        super(PFSPPlayerProcessPool, self).__init__(max_length, device)
        self.inference_processes = []
        self.queues = []
        self.producer_pipes = []
        self.consumer_pipes = []
        self.barrier = mp.Barrier(self.max_length + 1)
        mp.set_start_method(method='spawn', force=True)
        self._init_inference_processes()

    def _init_inference_processes(self):
        for _ in range(self.max_length):
            queue = mp.Queue()
            self.queues.append(queue)
            pipe_read, pipe_write = mp.Pipe(duplex=False)
            self.producer_pipes.append(pipe_write)
            self.consumer_pipes.append(pipe_read)
            process = mp.Process(target=player_inference_process,
                                 args=(pipe_read, queue, self.barrier),
                                 daemon=True)
            self.inference_processes.append(process)
            process.start()
        self.barrier.wait()

    def add_player(self, player):
        with torch.no_grad():
            model = dill.dumps(player.model)
            for i in range(self.max_length):
                if i == self.idx:
                    self.producer_pipes[i].send({'task': 'init'})
                    self.queues[i].put(model)
                else:
                    self.producer_pipes[i].send({'task': 'continue'})
            self.barrier.wait()
            if len(self.players) < self.max_length:
                self.players.append(player)
            else:
                self.players[self.idx] = player
            self.idx += 1
            self.idx %= self.max_length

    def inference(self, input_dict, res_dict, processed_obs):

        for i in range(self.max_length):
            if i < len(self.players) and len(self.players[i].env_indices):
                self.producer_pipes[i].send({'task': 'forward'})
                self.queues[i].put(
                    (processed_obs, res_dict['actions'],
                     res_dict['values'], self.players[i].env_indices))
            else:
                self.producer_pipes[i].send({'task': 'continue'})

    def __del__(self):
        for pipe in self.producer_pipes:
            pipe.send({'task': 'terminate'})
        for process in self.inference_processes:
            process.join()


================================================
FILE: timechamber/learning/ppo_sp_agent.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]

import copy
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
from .pfsp_player_pool import PFSPPlayerPool, SinglePlayer, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
    PFSPPlayerVectorizedPool
from timechamber.utils.utils import load_checkpoint
from rl_games.algos_torch import a2c_continuous
from rl_games.common.a2c_common import swap_and_flatten01
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
import torch
from torch import optim
from tensorboardX import SummaryWriter
import torch.distributed as dist


class SPAgent(a2c_continuous.A2CAgent):
    def __init__(self, base_name, params):
        params['config']['device'] = params['device']
        super().__init__(base_name, params)
        self.player_pool_type = params['player_pool_type']
        self.base_model_config = {
            'actions_num': self.actions_num,
            'input_shape': self.obs_shape,
            'num_seqs': self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value': self.normalize_value,
            'normalize_input': self.normalize_input,
        }
        self.max_his_player_num = params['player_pool_length']

        if params['op_load_path']:
            self.init_op_model = self.create_model()
            self.restore_op(params['op_load_path'])
        else:
            self.init_op_model = self.model
        self.players_dir = os.path.join(self.experiment_dir, 'policy_dir')
        os.makedirs(self.players_dir, exist_ok=True)
        self.update_win_rate = params['update_win_rate']
        self.num_opponent_agents = params['num_agents'] - 1
        self.player_pool = self._build_player_pool(params)

        self.games_to_check = params['games_to_check']
        self.now_update_steps = 0
        self.max_update_steps = params['max_update_steps']
        self.update_op_num = 0
        self.update_player_pool(self.init_op_model, player_idx=self.update_op_num)
        self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))

        assert self.num_actors % self.max_his_player_num == 0

    def _build_player_pool(self, params):
        if self.player_pool_type == 'multi_thread':
            return PFSPPlayerProcessPool(max_length=self.max_his_player_num,
                                         device=self.device)
        elif self.player_pool_type == 'multi_process':
            return PFSPPlayerThreadPool(max_length=self.max_his_player_num,
                                        device=self.device)
        elif self.player_pool_type == 'vectorized':
            vector_model_config = self.base_model_config
            vector_model_config['num_envs'] = self.num_actors * self.num_opponent_agents
            vector_model_config['population_size'] = self.max_his_player_num

            return PFSPPlayerVectorizedPool(max_length=self.max_his_player_num, device=self.device,
                                            vector_model_config=vector_model_config, params=params)
        else:
            return PFSPPlayerPool(max_length=self.max_his_player_num, device=self.device)

    def play_steps(self):
        update_list = self.update_list
        step_time = 0.0
        env_done_indices = torch.tensor([], device=self.device, dtype=torch.long)
        
        for n in range(self.horizon_length):
            self.obs = self.env_reset(env_done_indices)
            if self.use_action_masks:
                masks = self.vec_env.get_action_masks()
                res_dict = self.get_masked_action_values(self.obs, masks)
            else:
                res_dict_op = self.get_action_values(self.obs, is_op=True)

                res_dict = self.get_action_values(self.obs)
            self.experience_buffer.update_data('obses', n, self.obs['obs'])
            self.experience_buffer.update_data('dones', n, self.dones)
            for k in update_list:
                self.experience_buffer.update_data(k, n, res_dict[k])
            if self.has_central_value:
                self.experience_buffer.update_data('states', n, self.obs['states'])

            if self.player_pool_type == 'multi_thread':
                self.player_pool.thread_pool.shutdown()
            step_time_start = time.time()
            self.obs, rewards, self.dones, infos = self.env_step(
                torch.cat((res_dict['actions'], res_dict_op['actions']), dim=0))
            step_time_end = time.time()
            step_time += (step_time_end - step_time_start)

            shaped_rewards = self.rewards_shaper(rewards)
            if self.value_bootstrap and 'time_outs' in infos:
                shaped_rewards += self.gamma * res_dict['values'] * self.cast_obs(infos['time_outs']).unsqueeze(
                    1).float()

            self.experience_buffer.update_data('rewards', n, shaped_rewards)

            self.current_rewards += rewards
            self.current_lengths += 1
            all_done_indices = self.dones.nonzero(as_tuple=False)
            env_done_indices = self.dones.view(self.num_actors, self.num_agents).all(dim=1).nonzero(as_tuple=False)
            # print(f"env done indices: {env_done_indices}")
            # print(f"self.dones {self.dones}")
            self.game_rewards.update(self.current_rewards[env_done_indices])
            self.game_lengths.update(self.current_lengths[env_done_indices])
            self.algo_observer.process_infos(infos, env_done_indices)

            not_dones = 1.0 - self.dones.float()

            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
            self.current_lengths = self.current_lengths * not_dones

            self.player_pool.update_player_metric(infos=infos)
            self.resample_op(all_done_indices.flatten())

            env_done_indices = env_done_indices[:, 0]

        last_values = self.get_values(self.obs)

        fdones = self.dones.float()
        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
        mb_values = self.experience_buffer.tensor_dict['values']
        mb_rewards = self.experience_buffer.tensor_dict['rewards']
        mb_advs = self.discount_values(fdones, last_values, mb_fdones, mb_values, mb_rewards)
        mb_returns = mb_advs + mb_values

        batch_dict = self.experience_buffer.get_transformed_list(swap_and_flatten01, self.tensor_list)
        batch_dict['returns'] = swap_and_flatten01(mb_returns)
        batch_dict['played_frames'] = self.batch_size
        batch_dict['step_time'] = step_time
        return batch_dict

    def env_step(self, actions):
        actions = self.preprocess_actions(actions)
        obs, rewards, dones, infos = self.vec_env.step(actions)
        obs['obs_op'] = obs['obs'][self.num_actors:]
        obs['obs'] = obs['obs'][:self.num_actors]
        if self.is_tensor_obses:
            if self.value_size == 1:
                rewards = rewards.unsqueeze(1)
            return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
        else:
            if self.value_size == 1:
                rewards = np.expand_dims(rewards, axis=1)
            return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(
                dones).to(self.ppo_device), infos

    def env_reset(self, env_ids=None):
        obs = self.vec_env.reset(env_ids)
        obs = self.obs_to_tensors(obs)
        obs['obs_op'] = obs['obs'][self.num_actors:]
        obs['obs'] = obs['obs'][:self.num_actors]
        return obs

    def train(self):
        self.init_tensors()
        self.mean_rewards = self.last_mean_rewards = -100500
        start_time = time.time()
        total_time = 0
        rep_count = 0
        # self.frame = 0  # loading from checkpoint
        self.obs = self.env_reset()

        if self.multi_gpu:
            torch.cuda.set_device(self.rank)
            print("====================broadcasting parameters")
            model_params = [self.model.state_dict()]
            dist.broadcast_object_list(model_params, 0)
            self.model.load_state_dict(model_params[0])

        while True:
            epoch_num = self.update_epoch()
            step_time, play_time, update_time, sum_time, a_losses, c_losses, b_losses, entropies, kls, last_lr, lr_mul = self.train_epoch()
            # cleaning memory to optimize space
            self.dataset.update_values_dict(None)
            total_time += sum_time
            curr_frames = self.curr_frames * self.rank_size if self.multi_gpu else self.curr_frames
            self.frame += curr_frames
            should_exit = False

            if self.rank == 0:
                self.diagnostics.epoch(self, current_epoch=epoch_num)
                scaled_time = self.num_agents * sum_time
                scaled_play_time = self.num_agents * play_time

                frame = self.frame // self.num_agents

                if self.print_stats:
                    step_time = max(step_time, 1e-6)
                    fps_step = curr_frames / step_time
                    fps_step_inference = curr_frames / scaled_play_time
                    fps_total = curr_frames / scaled_time
                    print(
                        f'fps step: {fps_step:.0f} fps step and policy inference: {fps_step_inference:.0f} fps total: {fps_total:.0f} epoch: {epoch_num}/{self.max_epochs}')

                self.write_stats(total_time, epoch_num, step_time, play_time, update_time, a_losses, c_losses,
                                 entropies, kls, last_lr, lr_mul, frame, scaled_time, scaled_play_time, curr_frames)

                self.algo_observer.after_print_stats(frame, epoch_num, total_time)

                if self.game_rewards.current_size > 0:
                    mean_rewards = self.game_rewards.get_mean()
                    mean_lengths = self.game_lengths.get_mean()
                    self.mean_rewards = mean_rewards[0]

                    for i in range(self.value_size):
                        rewards_name = 'rewards' if i == 0 else 'rewards{0}'.format(i)
                        self.writer.add_scalar(rewards_name + '/step'.format(i), mean_rewards[i], frame)
                        self.writer.add_scalar(rewards_name + '/iter'.format(i), mean_rewards[i], epoch_num)
                        self.writer.add_scalar(rewards_name + '/time'.format(i), mean_rewards[i], total_time)

                    self.writer.add_scalar('episode_lengths/step', mean_lengths, frame)
                    self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
                    self.writer.add_scalar('episode_lengths/time', mean_lengths, total_time)

                    # removed equal signs (i.e. "rew=") from the checkpoint name since it messes with hydra CLI parsing
                    checkpoint_name = self.config['name'] + '_ep_' + str(epoch_num) + '_rew_' + str(mean_rewards[0])

                    if self.save_freq > 0:
                        if (epoch_num % self.save_freq == 0) and (mean_rewards <= self.last_mean_rewards):
                            self.save(os.path.join(self.nn_dir, 'last_' + checkpoint_name))

                    if mean_rewards[0] > self.last_mean_rewards and epoch_num >= self.save_best_after:
                        print('saving next best rewards: ', mean_rewards)
                        self.last_mean_rewards = mean_rewards[0]
                        self.save(os.path.join(self.nn_dir, self.config['name']))

                        if 'score_to_win' in self.config:
                            if self.last_mean_rewards > self.config['score_to_win']:
                                print('Network won!')
                                self.save(os.path.join(self.nn_dir, checkpoint_name))
                                should_exit = True

                if epoch_num >= self.max_epochs:
                    if self.game_rewards.current_size == 0:
                        print('WARNING: Max epochs reached before any env terminated at least once')
                        mean_rewards = -np.inf

                    self.save(os.path.join(self.nn_dir,
                                           'last_' + self.config['name'] + 'ep' + str(epoch_num) + 'rew' + str(
                                               mean_rewards)))
                    print('MAX EPOCHS NUM!')
                    should_exit = True
                self.update_metric()
                update_time = 0

            if self.multi_gpu:
                should_exit_t = torch.tensor(should_exit, device=self.device).float()
                dist.broadcast(should_exit_t, 0)
                should_exit = should_exit_t.bool().item()
            if should_exit:
                return self.last_mean_rewards, epoch_num

    def update_metric(self):
        tot_win_rate = 0
        tot_games_num = 0
        self.now_update_steps += 1
        # self_player process
        for player in self.player_pool.players:
            win_rate = player.win_rate()
            games = player.games_num()
            self.writer.add_scalar(f'rate/win_rate_player_{player.player_idx}', win_rate, self.epoch_num)
            tot_win_rate += win_rate * games
            tot_games_num += games
        win_rate = tot_win_rate / tot_games_num
        if tot_games_num > self.games_to_check:
            self.check_update_opponent(win_rate)
        self.writer.add_scalar('rate/win_rate', win_rate, self.epoch_num)

    def get_action_values(self, obs, is_op=False):
        processed_obs = self._preproc_obs(obs['obs_op'] if is_op else obs['obs'])
        if not is_op:
            self.model.eval()
        input_dict = {
            'is_train': False,
            'prev_actions': None,
            'obs': processed_obs,
            'rnn_states': self.rnn_states
        }
        with torch.no_grad():
            if is_op:
                res_dict = {
                    "actions": torch.zeros((self.num_actors * self.num_opponent_agents, self.actions_num),
                                           device=self.device),
                    "values": torch.zeros((self.num_actors * self.num_opponent_agents, 1), device=self.device)
                }
                self.player_pool.inference(input_dict, res_dict, processed_obs)
            else:
                res_dict = self.model(input_dict)
            if self.has_central_value:
                states = obs['states']
                input_dict = {
                    'is_train': False,
                    'states': states,
                }
                value = self.get_central_value(input_dict)
                res_dict['values'] = value
        return res_dict

    def resample_op(self, resample_indices):
        for op_idx in range(self.num_opponent_agents):
            for player in self.player_pool.players:
                player.remove_envs(resample_indices + op_idx * self.num_actors)
        for op_idx in range(self.num_opponent_agents):
            for env_idx in resample_indices:
                player = self.player_pool.sample_player()
                player.add_envs(env_idx + op_idx * self.num_actors)
        for player in self.player_pool.players:
            player.reset_envs()

    def resample_batch(self):
        env_indices = torch.arange(end=self.num_actors * self.num_opponent_agents,
                                   device=self.device, dtype=torch.long,
                                   requires_grad=False)
        step = self.num_actors // 32
        for player in self.player_pool.players:
            player.clear_envs()
        for i in range(0, self.num_actors, step):
            player = self.player_pool.sample_player()
            player.add_envs(env_indices[i:i + step])
        print("resample done")

    def restore_op(self, fn):
        checkpoint = load_checkpoint(fn, device=self.device)
        self.init_op_model.load_state_dict(checkpoint['model'])
        if self.normalize_input and 'running_mean_std' in checkpoint:
            self.init_op_model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])

    def check_update_opponent(self, win_rate):
        if win_rate > self.update_win_rate or self.now_update_steps > self.max_update_steps:
            print(f'winrate:{win_rate},add opponent to player pool')
            self.update_op_num += 1
            self.now_update_steps = 0
            self.update_player_pool(self.model, player_idx=self.update_op_num)
            self.player_pool.clear_player_metric()
            self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))
            self.save(os.path.join(self.players_dir, f'policy_{self.update_op_num}'))

    def create_model(self):
        model = self.network.build(self.base_model_config)
        model.to(self.device)
        return model

    def update_player_pool(self, model, player_idx):
        new_model = self.create_model()
        new_model.load_state_dict(copy.deepcopy(model.state_dict()))
        if hasattr(model, 'running_mean_std'):
            new_model.running_mean_std.load_state_dict(copy.deepcopy(model.running_mean_std.state_dict()))
        player = SinglePlayer(player_idx, new_model, self.device, self.num_actors * self.num_opponent_agents)
        self.player_pool.add_player(player)


================================================
FILE: timechamber/learning/ppo_sp_player.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import os
import time
import torch
import numpy as np
from rl_games.algos_torch import players
import random
from rl_games.algos_torch import torch_ext
from rl_games.common.tr_helpers import unsqueeze_obs
from rl_games.common.player import BasePlayer
from .pfsp_player_pool import PFSPPlayerPool, PFSPPlayerVectorizedPool, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
    SinglePlayer
import matplotlib.pyplot as plt

from multielo import MultiElo


def rescale_actions(low, high, action):
    d = (high - low) / 2.0
    m = (high + low) / 2.0
    scaled_action = action * d + m
    return scaled_action


class SPPlayer(BasePlayer):
    def __init__(self, params):
        params['config']['device_name'] = params['device']
        super().__init__(params)
        print(f'params:{params}')
        self.network = self.config['network']
        self.actions_num = self.action_space.shape[0]
        self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
        self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
        self.mask = [False]
        self.is_rnn = False
        self.normalize_input = self.config['normalize_input']
        self.normalize_value = self.config.get('normalize_value', False)
        self.base_model_config = {
            'actions_num': self.actions_num,
            'input_shape': self.obs_shape,
            'num_seqs': self.num_agents,
            'value_size': self.env_info.get('value_size', 1),
            'normalize_value': self.normalize_value,
            'normalize_input': self.normalize_input,
        }
        self.policy_timestep = []
        self.policy_op_timestep = []
        self.params = params
        self.record_elo = self.player_config.get('record_elo', False)
        self.init_elo = self.player_config.get('init_elo', 400)
        self.num_actors = params['config']['num_actors']
        self.player_pool_type = params['player_pool_type']
        self.player_pool = None
        self.op_player_pool = None
        self.num_opponents = params['num_agents'] - 1
        self.max_steps = 1000
        self.update_op_num = 0
        self.players_per_env = []
        self.elo = MultiElo()

    def restore(self, load_dir):
        if os.path.isdir(load_dir):
            self.player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
            print('dir:', load_dir)
            sorted_players = []
            for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
                model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
                self.policy_timestep.append(model_timestep)
                model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
                new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
                                          rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
                sorted_players.append(new_player)
            sorted_players.sort(key=lambda player: player.player_idx)
            for idx, player in enumerate(sorted_players):
                player.player_idx = idx
                self.player_pool.add_player(player)
            self.policy_timestep.sort()
        else:
            self.player_pool = self._build_player_pool(params=self.params, player_num=1)
            self.policy_timestep.append(os.path.getmtime(load_dir))
            model = self.load_model(load_dir)
            new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
                                      rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
            self.player_pool.add_player(new_player)
        self.restore_op(self.params['op_load_path'])
        self._norm_policy_timestep()
        self._alloc_env_indices()

    def restore_op(self, load_dir):
        if os.path.isdir(load_dir):
            self.op_player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
            sorted_players = []
            for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
                model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
                self.policy_op_timestep.append(model_timestep)
                model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
                new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
                                          rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
                sorted_players.append(new_player)
            sorted_players.sort(key=lambda player: player.player_idx)
            for idx, player in enumerate(sorted_players):
                player.player_idx = idx
                self.op_player_pool.add_player(player)
            self.policy_op_timestep.sort()
        else:
            self.op_player_pool = self._build_player_pool(params=self.params, player_num=1)
            self.policy_op_timestep.append(os.path.getmtime(load_dir))
            model = self.load_model(load_dir)
            new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
                                      rating=400, obs_batch_len=self.num_actors * self.num_opponents)
            self.op_player_pool.add_player(new_player)

    def _alloc_env_indices(self):
        for idx in range(self.num_actors):
            player_idx = random.randint(0, len(self.player_pool.players) - 1)
            self.player_pool.players[player_idx].add_envs(torch.tensor([idx], dtype=torch.long, device=self.device))
            env_player = [self.player_pool.players[player_idx]]
            for op_idx in range(self.num_opponents):
                op_player_idx = random.randint(0, len(self.op_player_pool.players) - 1)
                self.op_player_pool.players[op_player_idx].add_envs(
                    torch.tensor([idx + op_idx * self.num_actors], dtype=torch.long, device=self.device))
                env_player.append(self.op_player_pool.players[op_player_idx])
            self.players_per_env.append(env_player)
        for player in self.player_pool.players:
            player.reset_envs()
        for player in self.op_player_pool.players:
            player.reset_envs()

    def _build_player_pool(self, params, player_num):

        if self.player_pool_type == 'multi_thread':
            return PFSPPlayerProcessPool(max_length=player_num,
                                         device=self.device)
        elif self.player_pool_type == 'multi_process':
            return PFSPPlayerThreadPool(max_length=player_num,
                                        device=self.device)
        elif self.player_pool_type == 'vectorized':
            vector_model_config = self.base_model_config
            vector_model_config['num_envs'] = self.num_actors * self.num_opponents
            vector_model_config['population_size'] = player_num

            return PFSPPlayerVectorizedPool(max_length=player_num, device=self.device,
                                            vector_model_config=vector_model_config, params=params)
        else:
            return PFSPPlayerPool(max_length=player_num, device=self.device)

    def _update_rating(self, info, env_indices):
        for env_idx in env_indices:
            if self.num_opponents == 1:
                player = self.players_per_env[env_idx][0]
                op_player = self.players_per_env[env_idx][1]
                if info['win'][env_idx]:
                    player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating])
                elif info['lose'][env_idx]:
                    op_player.rating, player.rating = self.elo.get_new_ratings([op_player.rating, player.rating])
                elif info['draw'][env_idx]:
                    player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating],
                                                                               result_order=[1, 1])
            else:
                ranks = info['ranks'][env_idx].cpu().numpy()
                players_sorted_by_rank = sorted(enumerate(self.players_per_env[env_idx]), key=lambda x: ranks[x[0]])
                sorted_ranks = sorted(ranks)
                now_ratings = [player.rating for idx, player in players_sorted_by_rank]
                new_ratings = self.elo.get_new_ratings(now_ratings, result_order=sorted_ranks)
                # print(now_ratings, new_ratings)
                # assert new_ratings[0] > 0 and new_ratings[1] > 0 and new_ratings[2] > 0
                for idx, new_rating in enumerate(new_ratings):
                    players_sorted_by_rank[idx][1].rating = new_rating

    def run(self):
        n_games = self.games_num
        render = self.render_env
        n_game_life = self.n_game_life
        is_determenistic = self.is_determenistic
        sum_rewards = 0
        sum_steps = 0
        sum_game_res = 0
        n_games = n_games * n_game_life
        games_played = 0
        has_masks = False
        has_masks_func = getattr(self.env, "has_action_mask", None) is not None

        if has_masks_func:
            has_masks = self.env.has_action_mask()
        print(f'games_num:{n_games}')
        need_init_rnn = self.is_rnn
        for _ in range(n_games):
            if games_played >= n_games:
                break

            obses = self.env_reset(self.env)
            batch_size = 1
            batch_size = self.get_batch_size(obses['obs'], batch_size)

            if need_init_rnn:
                self.init_rnn()
                need_init_rnn = False

            cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
            steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)

            print_game_res = False
            done_indices = torch.tensor([], device=self.device, dtype=torch.long)
            for n in range(self.max_steps):
                obses = self.env_reset(self.env, done_indices)
                if has_masks:
                    masks = self.env.get_action_mask()
                    action = self.get_masked_action(
                        obses, masks, is_determenistic)
                else:
                    action = self.get_action(obses['obs'], is_determenistic)
                    action_op = self.get_action(obses['obs_op'], is_determenistic, is_op=True)
                obses, r, done, info = self.env_step(self.env, torch.cat((action, action_op), dim=0))
                cr += r
                steps += 1

                if render:
                    self.env.render(mode='human')
                    time.sleep(self.render_sleep)

                all_done_indices = done.nonzero(as_tuple=False)
                done_indices = all_done_indices[::self.num_agents]
                done_count = len(done_indices)
                games_played += done_count
                if self.record_elo:
                    self._update_rating(info, all_done_indices.flatten())
                if done_count > 0:
                    if self.is_rnn:
                        for s in self.states:
                            s[:, all_done_indices, :] = s[:, all_done_indices, :] * 0.0

                    cur_rewards = cr[done_indices].sum().item()
                    cur_steps = steps[done_indices].sum().item()

                    cr = cr * (1.0 - done.float())
                    steps = steps * (1.0 - done.float())
                    sum_rewards += cur_rewards
                    sum_steps += cur_steps

                    game_res = 0.0
                    if isinstance(info, dict):
                        if 'battle_won' in info:
                            print_game_res = True
                            game_res = info.get('battle_won', 0.5)
                        if 'scores' in info:
                            print_game_res = True
                            game_res = info.get('scores', 0.5)
                    if self.print_stats:
                        if print_game_res:
                            print('reward:', cur_rewards / done_count,
                                  'steps:', cur_steps / done_count, 'w:', game_res)
                        else:
                            print('reward:', cur_rewards / done_count,
                                  'steps:', cur_steps / done_count)

                    sum_game_res += game_res
                    if batch_size // self.num_agents == 1 or games_played >= n_games:
                        print(f"games_player: {games_played}")
                        break
                done_indices = done_indices[:, 0]

        if self.record_elo:
            self._plot_elo_curve()

    def _plot_elo_curve(self):
        x = np.array(self.policy_timestep)
        y = np.arange(len(self.player_pool.players))
        x_op = np.array(self.policy_op_timestep)
        y_op = np.arange(len(self.op_player_pool.players))
        for player in self.player_pool.players:
            idx = player.player_idx
            # print(player.player_idx, player.rating)
            y[idx] = player.rating
        for player in self.op_player_pool.players:
            idx = player.player_idx
            # print(player.player_idx, player.rating)
            y_op[idx] = player.rating
        if self.params['load_path'] != self.params['op_load_path']:
            l1 = plt.plot(x, y, 'b--', label='policy')
            l2 = plt.plot(x_op, y_op, 'r--', label='policy_op')
            plt.plot(x, y, 'b^-', x_op, y_op, 'ro-')
        else:
            l1 = plt.plot(x, y, 'b--', label='policy')
            plt.plot(x, y, 'b^-')
        plt.title('ELO Curve')
        plt.xlabel('timestep/days')
        plt.ylabel('ElO')
        plt.legend()
        parent_path = os.path.dirname(self.params['load_path'])
        plt.savefig(os.path.join(parent_path, 'elo.jpg'))

    def get_action(self, obs, is_determenistic=False, is_op=False):
        if self.has_batch_dimension == False:
            obs = unsqueeze_obs(obs)
        obs = self._preproc_obs(obs)
        input_dict = {
            'is_train': False,
            'prev_actions': None,
            'obs': obs,
            'rnn_states': self.states
        }
        with torch.no_grad():
            data_len = self.num_actors * self.num_opponents if is_op else self.num_actors
            res_dict = {
                "actions": torch.zeros((data_len, self.actions_num), device=self.device),
                "values": torch.zeros((data_len, 1), device=self.device),
                "mus": torch.zeros((data_len, self.actions_num), device=self.device)
            }
            if is_op:
                self.op_player_pool.inference(input_dict, res_dict, obs)
            else:
                self.player_pool.inference(input_dict, res_dict, obs)
        mu = res_dict['mus']
        action = res_dict['actions']
        # self.states = res_dict['rnn_states']
        if is_determenistic:
            current_action = mu
        else:
            current_action = action
        if self.has_batch_dimension == False:
            current_action = torch.squeeze(current_action.detach())

        if self.clip_actions:
            return rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0))
        else:
            return current_action

    def _norm_policy_timestep(self):
        self.policy_op_timestep.sort()
        self.policy_timestep.sort()
        for idx in range(1, len(self.policy_op_timestep)):
            self.policy_op_timestep[idx] -= self.policy_op_timestep[0]
            self.policy_op_timestep[idx] /= 3600 * 24
        for idx in range(1, len(self.policy_timestep)):
            self.policy_timestep[idx] -= self.policy_timestep[0]
            self.policy_timestep[idx] /= 3600 * 24
        self.policy_timestep[0] = 0
        if len(self.policy_op_timestep):
            self.policy_op_timestep[0] = 0

    def env_reset(self, env, done_indices=None):
        obs = env.reset(done_indices)
        obs_dict = {}
        obs_dict['obs_op'] = obs[self.num_actors:]
        obs_dict['obs'] = obs[:self.num_actors]
        return obs_dict

    def env_step(self, env, actions):
        obs, rewards, dones, infos = env.step(actions)
        if hasattr(obs, 'dtype') and obs.dtype == np.float64:
            obs = np.float32(obs)
        obs_dict = {}
        obs_dict['obs_op'] = obs[self.num_actors:]
        obs_dict['obs'] = obs[:self.num_actors]
        if self.value_size > 1:
            rewards = rewards[0]
        if self.is_tensor_obses:
            return self.obs_to_torch(obs_dict), rewards.cpu(), dones.cpu(), infos
        else:
            if np.isscalar(dones):
                rewards = np.expand_dims(np.asarray(rewards), 0)
                dones = np.expand_dims(np.asarray(dones), 0)
            return obs_dict, rewards, dones, infos

    def create_model(self):
        model = self.network.build(self.base_model_config)
        model.to(self.device)
        return model

    def load_model(self, fn):
        model = self.create_model()
        checkpoint = torch_ext.safe_filesystem_op(torch.load, fn, map_location=self.device)
        model.load_state_dict(checkpoint['model'])
        if self.normalize_input and 'running_mean_std' in checkpoint:
            model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])
        return model


================================================
FILE: timechamber/learning/replay_buffer.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]

import torch


class ReplayBuffer():
    def __init__(self, buffer_size, device):
        self._head = 0
        self._total_count = 0
        self._buffer_size = buffer_size
        self._device = device
        self._data_buf = None
        self._sample_idx = torch.randperm(buffer_size)
        self._sample_head = 0

        return

    def reset(self):
        self._head = 0
        self._total_count = 0
        self._reset_sample_idx()
        return

    def get_buffer_size(self):
        return self._buffer_size

    def get_total_count(self):
        return self._total_count

    def store(self, data_dict):
        if (self._data_buf is None):
            self._init_data_buf(data_dict)

        n = next(iter(data_dict.values())).shape[0]
        buffer_size = self.get_buffer_size()
        assert (n < buffer_size)

        for key, curr_buf in self._data_buf.items():
            curr_n = data_dict[key].shape[0]
            assert (n == curr_n)

            store_n = min(curr_n, buffer_size - self._head)
            curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n]

            remainder = n - store_n
            if (remainder > 0):
                curr_buf[0:remainder] = data_dict[key][store_n:]

        self._head = (self._head + n) % buffer_size
        self._total_count += n

        return

    def sample(self, n):
        total_count = self.get_total_count()
        buffer_size = self.get_buffer_size()

        idx = torch.arange(self._sample_head, self._sample_head + n)
        idx = idx % buffer_size
        rand_idx = self._sample_idx[idx]
        if (total_count < buffer_size):
            rand_idx = rand_idx % self._head

        samples = dict()
        for k, v in self._data_buf.items():
            samples[k] = v[rand_idx]

        self._sample_head += n
        if (self._sample_head >= buffer_size):
            self._reset_sample_idx()

        return samples

    def _reset_sample_idx(self):
        buffer_size = self.get_buffer_size()
        self._sample_idx[:] = torch.randperm(buffer_size)
        self._sample_head = 0
        return

    def _init_data_buf(self, data_dict):
        buffer_size = self.get_buffer_size()
        self._data_buf = dict()

        for k, v in data_dict.items():
            v_shape = v.shape[1:]
            self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device)

        return


================================================
FILE: timechamber/learning/vectorized_models.py
================================================
import torch
import torch.nn as nn
from rl_games.algos_torch.running_mean_std import RunningMeanStd, RunningMeanStdObs
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.models import ModelA2CContinuousLogStd


class VectorizedRunningMeanStd(RunningMeanStd):
    def __init__(self, insize, population_size, epsilon=1e-05, per_channel=False, norm_only=False, is_training=False):
        # input shape: population_size*batch_size*(insize)
        super(VectorizedRunningMeanStd, self).__init__(population_size, epsilon, per_channel, norm_only)
        self.insize = insize
        self.epsilon = epsilon
        self.population_size = population_size
        self.training = is_training
        self.norm_only = norm_only
        self.per_channel = per_channel
        if per_channel:
            if len(self.insize) == 3:
                self.axis = [1, 3, 4]
            if len(self.insize) == 2:
                self.axis = [1, 3]
            if len(self.insize) == 1:
                self.axis = [1]
            in_size = self.insize[1]
        else:
            self.axis = [1]
            in_size = insize
        # print(in_size)
        self.register_buffer("running_mean", torch.zeros((population_size, *in_size), dtype=torch.float32))
        self.register_buffer("running_var", torch.ones((population_size, *in_size), dtype=torch.float32))
        self.register_buffer("count", torch.ones((population_size, 1), dtype=torch.float32))

    def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count):
        delta = batch_mean - mean
        tot_count = count + batch_count
        new_mean = mean + delta * batch_count / tot_count
        m_a = var * count
        m_b = batch_var * batch_count
        M2 = m_a + m_b + delta ** 2 * count * batch_count / tot_count
        new_var = M2 / tot_count
        new_count = tot_count
        return new_mean, new_var, new_count

    def forward(self, input, unnorm=False, mask=None):
        if self.training:
            if mask is not None:
                mean, var = torch_ext.get_mean_std_with_masks(input, mask)
            else:
                mean = input.mean(self.axis)  # along channel axis
                var = input.var(self.axis)
            self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(
                self.running_mean, self.running_var, self.count,
                mean, var, input.size()[1])

        # change shape
        if self.per_channel:
            if len(self.insize) == 3:
                current_mean = self.running_mean.view([self.population_size, 1, self.insize[0], 1, 1]).expand_as(input)
                current_var = self.running_var.view([self.population_size, 1, self.insize[0], 1, 1]).expand_as(input)
            if len(self.insize) == 2:
                current_mean = self.running_mean.view([self.population_size, 1, self.insize[0], 1]).expand_as(input)
                current_var = self.running_var.view([self.population_size, 1, self.insize[0], 1]).expand_as(input)
            if len(self.insize) == 1:
                current_mean = self.running_mean.view([self.population_size, 1, self.insize[0]]).expand_as(input)
                current_var = self.running_var.view([self.population_size, 1, self.insize[0]]).expand_as(input)
        else:
            current_mean = self.running_mean
            current_var = self.running_var
        # get output

        if unnorm:
            y = torch.clamp(input, min=-5.0, max=5.0)
            y = torch.sqrt(torch.unsqueeze(current_var.float(), 1) + self.epsilon) * y + torch.unsqueeze(
                current_mean.float(), 1)
        else:
            if self.norm_only:
                y = input / torch.sqrt(current_var.float() + self.epsilon)
            else:
                y = (input - torch.unsqueeze(current_mean.float(), 1)) / torch.sqrt(
                    torch.unsqueeze(current_var.float(), 1) + self.epsilon)
                y = torch.clamp(y, min=-5.0, max=5.0)
        return y


class ModelVectorizedA2C(ModelA2CContinuousLogStd):
    def __init__(self, network):
        super().__init__(network)
        return

    def build(self, config):
        net = self.network_builder.build('vectorized_a2c', **config)
        for name, _ in net.named_parameters():
            print(name)

        obs_shape = config['input_shape']
        population_size = config['population_size']
        normalize_value = config.get('normalize_value', False)
        normalize_input = config.get('normalize_input', False)
        value_size = config.get('value_size', 1)

        return self.Network(net, population_size, obs_shape=obs_shape,
                            normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size, )

    class Network(ModelA2CContinuousLogStd.Network):
        def __init__(self, a2c_network, population_size, obs_shape, normalize_value, normalize_input, value_size):
            self.population_size = population_size
            super().__init__(a2c_network, obs_shape=obs_shape,
                             normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size)
            if normalize_value:
                self.value_mean_std = VectorizedRunningMeanStd((self.value_size,), self.population_size)
            if normalize_input:
                if isinstance(obs_shape, dict):
                    self.running_mean_std = RunningMeanStdObs(obs_shape)
                else:
                    self.running_mean_std = VectorizedRunningMeanStd(obs_shape, self.population_size)

        def update(self, population_idx, network):
            for key in self.state_dict():
                param1 = self.state_dict()[key]
                param2 = network.state_dict()[key]
                if len(param1.shape) == len(param2.shape):
                    self.state_dict()[key] = param2
                elif len(param2.shape) == 1:
                    if len(param1.shape) == 3:
                        param1[population_idx] = torch.unsqueeze(param2, dim=0)
                    else:
                        param1[population_idx] = param2
                elif len(param2.shape) == 2:
                    param1[population_idx] = torch.transpose(param2, 0, 1)


================================================
FILE: timechamber/learning/vectorized_network_builder.py
================================================
import torch
import torch.nn as nn
import math
from rl_games.algos_torch import network_builder


class VectorizedLinearLayer(torch.nn.Module):
    """Vectorized version of torch.nn.Linear."""

    def __init__(
            self,
            population_size: int,
            in_features: int,
            out_features: int,
            use_layer_norm: bool = False,
    ):
        super().__init__()
        self._population_size = population_size
        self._in_features = in_features
        self._out_features = out_features

        self.weight = torch.nn.Parameter(
            torch.empty(self._population_size, self._in_features, self._out_features),
            requires_grad=True,
        )
        self.bias = torch.nn.Parameter(
            torch.empty(self._population_size, 1, self._out_features),
            requires_grad=True,
        )

        for member_id in range(population_size):
            torch.nn.init.kaiming_uniform_(self.weight[member_id], a=math.sqrt(5))
        fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight[0])
        bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
        torch.nn.init.uniform_(self.bias, -bound, bound)

        self._layer_norm = (
            torch.nn.LayerNorm(self._out_features, self._population_size)
            if use_layer_norm
            else None
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        assert x.shape[0] == self._population_size
        if self._layer_norm is not None:
            return self._layer_norm(x.matmul(self.weight) + self.bias)
        return x.matmul(self.weight) + self.bias


class VectorizedA2CBuilder(network_builder.A2CBuilder):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        return

    class Network(network_builder.A2CBuilder.Network):
        def __init__(self, params, **kwargs):
            self.population_size = kwargs.get('population_size')
            super().__init__(params, **kwargs)

            self.value = VectorizedLinearLayer(population_size=self.population_size,
                                               in_features=self.units[-1],
                                               out_features=self.value_size)
            actions_num = kwargs.get('actions_num')
            self.mu = VectorizedLinearLayer(self.population_size, self.units[-1], actions_num)
            if self.fixed_sigma:
                self.sigma = nn.Parameter(
                    torch.zeros((self.population_size, 1, actions_num), requires_grad=True, dtype=torch.float32),
                    requires_grad=True)
            else:
                self.sigma = VectorizedLinearLayer(self.population_size, self.units[-1], actions_num)

        def _build_vectorized_mlp(self,
                                  input_size,
                                  units,
                                  activation,
                                  norm_func_name=None):
            print(f'build vectorized mlp:{self.population_size}x{input_size}')
            in_size = input_size
            layers = []
            for unit in units:
                layers.append(
                    VectorizedLinearLayer(self.population_size, in_size, unit, norm_func_name == 'layer_norm'))
                layers.append(self.activations_factory.create(activation))
                in_size = unit
            return nn.Sequential(*layers)

        def _build_mlp(self,
                       input_size,
                       units,
                       activation,
                       dense_func,
                       norm_only_first_layer=False,
                       norm_func_name=None,
                       d2rl=False):
            return self._build_vectorized_mlp(input_size, units, activation, norm_func_name=norm_func_name)

        def forward(self, obs_dict):  # implement continues situation
            obs = obs_dict['obs']
            states = obs_dict.get('rnn_states', None)
            out = self.actor_mlp(obs)
            value = self.value_act(self.value(out))
            mu = self.mu_act(self.mu(out))
            if self.fixed_sigma:
                sigma = self.sigma_act(self.sigma)
            else:
                sigma = self.sigma_act(self.sigma(out))
            return mu, mu * 0 + sigma, value, states

        def load(self, params):
            super().load(params)

    def build(self, name, **kwargs):
        net = VectorizedA2CBuilder.Network(self.params, **kwargs)
        return net


================================================
FILE: timechamber/models/Humanoid_Strike/policy.pth
================================================
[File too large to display: 19.5 MB]

================================================
FILE: timechamber/models/Humanoid_Strike/policy_op.pth
================================================
[File too large to display: 19.5 MB]

================================================
FILE: timechamber/tasks/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from .ma_ant_sumo import MA_Ant_Sumo
from .ma_ant_battle import MA_Ant_Battle
from .ma_humanoid_strike import HumanoidStrike

# Mappings from strings to environments
isaacgym_task_map = {
    "MA_Ant_Sumo": MA_Ant_Sumo,
    "MA_Ant_Battle": MA_Ant_Battle,
    "MA_Humanoid_Strike": HumanoidStrike
}


================================================
FILE: timechamber/tasks/ase_humanoid_base/base_task.py
================================================
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

import sys
import os
import operator
from copy import deepcopy
import random

from isaacgym import gymapi
from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples

import numpy as np
import torch


# Base class for RL tasks
class BaseTask():

    def __init__(self, cfg, enable_camera_sensors=False):
        self.gym = gymapi.acquire_gym()

        self.device_type = cfg.get("device_type", "cuda")
        self.device_id = cfg.get("device_id", 0)

        self.device = "cpu"
        if self.device_type == "cuda" or self.device_type == "GPU":
            self.device = "cuda" + ":" + str(self.device_id)

        self.headless = cfg["headless"]
        self.num_agents = cfg["env"].get("numAgents", 1)  # used for multi-agent environments

        # double check!
        self.graphics_device_id = self.device_id
        if enable_camera_sensors == False and self.headless == True:
            self.graphics_device_id = -1

        self.num_envs = cfg["env"]["numEnvs"]
        self.num_obs = cfg["env"]["numObservations"]
        self.num_states = cfg["env"].get("numStates", 0)
        self.num_actions = cfg["env"]["numActions"]

        self.control_freq_inv = cfg["env"].get("controlFrequencyInv", 1)

        # optimization flags for pytorch JIT
        torch._C._jit_set_profiling_mode(False)
        torch._C._jit_set_profiling_executor(False)

        # allocate buffers
        self.obs_buf = torch.zeros(
            (self.num_envs, self.num_obs), device=self.device, dtype=torch.float)
        self.states_buf = torch.zeros(
            (self.num_envs, self.num_states), device=self.device, dtype=torch.float)
        self.rew_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.float)
        self.reset_buf = torch.ones(
            self.num_envs, device=self.device, dtype=torch.long)
        self.progress_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.randomize_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.extras = {}

        self.original_props = {}
        self.dr_randomizations = {}
        self.first_randomization = True
        self.actor_params_generator = None
        self.extern_actor_params = {}
        for env_id in range(self.num_envs):
            self.extern_actor_params[env_id] = None

        self.last_step = -1
        self.last_rand_step = -1

        # create envs, sim and viewer
        self.create_sim()
        self.gym.prepare_sim(self.sim)

        # todo: read from config
        self.enable_viewer_sync = True
        self.viewer = None

        # if running with a viewer, set up keyboard shortcuts and camera
        if self.headless == False:
            # subscribe to keyboard shortcuts
            self.viewer = self.gym.create_viewer(
                self.sim, gymapi.CameraProperties())
            self.gym.subscribe_viewer_keyboard_event(
                self.viewer, gymapi.KEY_ESCAPE, "QUIT")
            self.gym.subscribe_viewer_keyboard_event(
                self.viewer, gymapi.KEY_V, "toggle_viewer_sync")

            # set the camera position based on up axis
            sim_params = self.gym.get_sim_params(self.sim)
            if sim_params.up_axis == gymapi.UP_AXIS_Z:
                cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
                cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
            else:
                cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
                cam_target = gymapi.Vec3(10.0, 0.0, 15.0)

            self.gym.viewer_camera_look_at(
                self.viewer, None, cam_pos, cam_target)

    # set gravity based on up axis and return axis index
    def set_sim_params_up_axis(self, sim_params, axis):
        if axis == 'z':
            sim_params.up_axis = gymapi.UP_AXIS_Z
            sim_params.gravity.x = 0
            sim_params.gravity.y = 0
            sim_params.gravity.z = -9.81
            return 2
        return 1

    def create_sim(self, compute_device, graphics_device, physics_engine, sim_params):
        sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params)
        if sim is None:
            print("*** Failed to create sim")
            quit()

        return sim

    def step(self, actions):
        if self.dr_randomizations.get('actions', None):
            actions = self.dr_randomizations['actions']['noise_lambda'](actions)

        # apply actions
        self.pre_physics_step(actions)

        # step physics and render each frame
        self._physics_step()

        # to fix!
        if self.device == 'cpu':
            self.gym.fetch_results(self.sim, True)

        # compute observations, rewards, resets, ...
        self.post_physics_step()

        if self.dr_randomizations.get('observations', None):
            self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)

    def get_states(self):
        return self.states_buf

    def render(self, sync_frame_time=False):
        if self.viewer:
            # check for window closed
            if self.gym.query_viewer_has_closed(self.viewer):
                sys.exit()

            # check for keyboard events
            for evt in self.gym.query_viewer_action_events(self.viewer):
                if evt.action == "QUIT" and evt.value > 0:
                    sys.exit()
                elif evt.action == "toggle_viewer_sync" and evt.value > 0:
                    self.enable_viewer_sync = not self.enable_viewer_sync

            # fetch results
            if self.device != 'cpu':
                self.gym.fetch_results(self.sim, True)

            # step graphics
            if self.enable_viewer_sync:
                self.gym.step_graphics(self.sim)
                self.gym.draw_viewer(self.viewer, self.sim, True)
            else:
                self.gym.poll_viewer_events(self.viewer)

    def get_actor_params_info(self, dr_params, env):
        """Returns a flat array of actor params, their names and ranges."""
        if "actor_params" not in dr_params:
            return None
        params = []
        names = []
        lows = []
        highs = []
        param_getters_map = get_property_getter_map(self.gym)
        for actor, actor_properties in dr_params["actor_params"].items():
            handle = self.gym.find_actor_handle(env, actor)
            for prop_name, prop_attrs in actor_properties.items():
                if prop_name == 'color':
                    continue  # this is set randomly
                props = param_getters_map[prop_name](env, handle)
                if not isinstance(props, list):
                    props = [props]
                for prop_idx, prop in enumerate(props):
                    for attr, attr_randomization_params in prop_attrs.items():
                        name = prop_name+'_'+str(prop_idx)+'_'+attr
                        lo_hi = attr_randomization_params['range']
                        distr = attr_randomization_params['distribution']
                        if 'uniform' not in distr:
                            lo_hi = (-1.0*float('Inf'), float('Inf'))
                        if isinstance(prop, np.ndarray):
                            for attr_idx in range(prop[attr].shape[0]):
                                params.append(prop[attr][attr_idx])
                                names.append(name+'_'+str(attr_idx))
                                lows.append(lo_hi[0])
                                highs.append(lo_hi[1])
                        else:
                            params.append(getattr(prop, attr))
                            names.append(name)
                            lows.append(lo_hi[0])
                            highs.append(lo_hi[1])
        return params, names, lows, highs

    # Apply randomizations only on resets, due to current PhysX limitations
    def apply_randomizations(self, dr_params):
        # If we don't have a randomization frequency, randomize every step
        rand_freq = dr_params.get("frequency", 1)

        # First, determine what to randomize:
        #   - non-environment parameters when > frequency steps have passed since the last non-environment
        #   - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
        #   - on the first call, randomize everything
        self.last_step = self.gym.get_frame_count(self.sim)
        if self.first_randomization:
            do_nonenv_randomize = True
            env_ids = list(range(self.num_envs))
        else:
            do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
            rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf))
            rand_envs = torch.logical_and(rand_envs, self.reset_buf)
            env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
            self.randomize_buf[rand_envs] = 0

        if do_nonenv_randomize:
            self.last_rand_step = self.last_step

        param_setters_map = get_property_setter_map(self.gym)
        param_setter_defaults_map = get_default_setter_args(self.gym)
        param_getters_map = get_property_getter_map(self.gym)

        # On first iteration, check the number of buckets
        if self.first_randomization:
            check_buckets(self.gym, self.envs, dr_params)

        for nonphysical_param in ["observations", "actions"]:
            if nonphysical_param in dr_params and do_nonenv_randomize:
                dist = dr_params[nonphysical_param]["distribution"]
                op_type = dr_params[nonphysical_param]["operation"]
                sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None
                sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None
                op = operator.add if op_type == 'additive' else operator.mul

                if sched_type == 'linear':
                    sched_scaling = 1.0 / sched_step * \
                        min(self.last_step, sched_step)
                elif sched_type == 'constant':
                    sched_scaling = 0 if self.last_step < sched_step else 1
                else:
                    sched_scaling = 1

                if dist == 'gaussian':
                    mu, var = dr_params[nonphysical_param]["range"]
                    mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])

                    if op_type == 'additive':
                        mu *= sched_scaling
                        var *= sched_scaling
                        mu_corr *= sched_scaling
                        var_corr *= sched_scaling
                    elif op_type == 'scaling':
                        var = var * sched_scaling  # scale up var over time
                        mu = mu * sched_scaling + 1.0 * \
                            (1.0 - sched_scaling)  # linearly interpolate

                        var_corr = var_corr * sched_scaling  # scale up var over time
                        mu_corr = mu_corr * sched_scaling + 1.0 * \
                            (1.0 - sched_scaling)  # linearly interpolate

                    def noise_lambda(tensor, param_name=nonphysical_param):
                        params = self.dr_randomizations[param_name]
                        corr = params.get('corr', None)
                        if corr is None:
                            corr = torch.randn_like(tensor)
                            params['corr'] = corr
                        corr = corr * params['var_corr'] + params['mu_corr']
                        return op(
                            tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])

                    self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda}

                elif dist == 'uniform':
                    lo, hi = dr_params[nonphysical_param]["range"]
                    lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])

                    if op_type == 'additive':
                        lo *= sched_scaling
                        hi *= sched_scaling
                        lo_corr *= sched_scaling
                        hi_corr *= sched_scaling
                    elif op_type == 'scaling':
                        lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)

                    def noise_lambda(tensor, param_name=nonphysical_param):
                        params = self.dr_randomizations[param_name]
                        corr = params.get('corr', None)
                        if corr is None:
                            corr = torch.randn_like(tensor)
                            params['corr'] = corr
                        corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
                        return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])

                    self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda}

        if "sim_params" in dr_params and do_nonenv_randomize:
            prop_attrs = dr_params["sim_params"]
            prop = self.gym.get_sim_params(self.sim)

            if self.first_randomization:
                self.original_props["sim_params"] = {
                    attr: getattr(prop, attr) for attr in dir(prop)}

            for attr, attr_randomization_params in prop_attrs.items():
                apply_random_samples(
                    prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)

            self.gym.set_sim_params(self.sim, prop)

        # If self.actor_params_generator is initialized: use it to
        # sample actor simulation params. This gives users the
        # freedom to generate samples from arbitrary distributions,
        # e.g. use full-covariance distributions instead of the DR's
        # default of treating each simulation parameter independently.
        extern_offsets = {}
        if self.actor_params_generator is not None:
            for env_id in env_ids:
                self.extern_actor_params[env_id] = \
                    self.actor_params_generator.sample()
                extern_offsets[env_id] = 0

        for actor, actor_properties in dr_params["actor_params"].items():
            for env_id in env_ids:
                env = self.envs[env_id]
                handle = self.gym.find_actor_handle(env, actor)
                extern_sample = self.extern_actor_params[env_id]

                for prop_name, prop_attrs in actor_properties.items():
                    if prop_name == 'color':
                        num_bodies = self.gym.get_actor_rigid_body_count(
                            env, handle)
                        for n in range(num_bodies):
                            self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
                                                          gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)))
                        continue
                    if prop_name == 'scale':
                        attr_randomization_params = prop_attrs
                        sample = generate_random_samples(attr_randomization_params, 1,
                                                         self.last_step, None)
                        og_scale = 1
                        if attr_randomization_params['operation'] == 'scaling':
                            new_scale = og_scale * sample
                        elif attr_randomization_params['operation'] == 'additive':
                            new_scale = og_scale + sample
                        self.gym.set_actor_scale(env, handle, new_scale)
                        continue

                    prop = param_getters_map[prop_name](env, handle)
                    if isinstance(prop, list):
                        if self.first_randomization:
                            self.original_props[prop_name] = [
                                {attr: getattr(p, attr) for attr in dir(p)} for p in prop]
                        for p, og_p in zip(prop, self.original_props[prop_name]):
                            for attr, attr_randomization_params in prop_attrs.items():
                                smpl = None
                                if self.actor_params_generator is not None:
                                    smpl, extern_offsets[env_id] = get_attr_val_from_sample(
                                        extern_sample, extern_offsets[env_id], p, attr)
                                apply_random_samples(
                                    p, og_p, attr, attr_randomization_params,
                                    self.last_step, smpl)
                    else:
                        if self.first_randomization:
                            self.original_props[prop_name] = deepcopy(prop)
                        for attr, attr_randomization_params in prop_attrs.items():
                            smpl = None
                            if self.actor_params_generator is not None:
                                smpl, extern_offsets[env_id] = get_attr_val_from_sample(
                                    extern_sample, extern_offsets[env_id], prop, attr)
                            apply_random_samples(
                                prop, self.original_props[prop_name], attr,
                                attr_randomization_params, self.last_step, smpl)

                    setter = param_setters_map[prop_name]
                    default_args = param_setter_defaults_map[prop_name]
                    setter(env, handle, prop, *default_args)

        if self.actor_params_generator is not None:
            for env_id in env_ids:  # check that we used all dims in sample
                if extern_offsets[env_id] > 0:
                    extern_sample = self.extern_actor_params[env_id]
                    if extern_offsets[env_id] != extern_sample.shape[0]:
                        print('env_id', env_id,
                              'extern_offset', extern_offsets[env_id],
                              'vs extern_sample.shape', extern_sample.shape)
                        raise Exception("Invalid extern_sample size")

        self.first_randomization = False

    def pre_physics_step(self, actions):
        raise NotImplementedError

    def _physics_step(self):
        for i in range(self.control_freq_inv):
            self.render()
            self.gym.simulate(self.sim)
        return

    def post_physics_step(self):
        raise NotImplementedError


def get_attr_val_from_sample(sample, offset, prop, attr):
    """Retrieves param value for the given prop and attr from the sample."""
    if sample is None:
        return None, 0
    if isinstance(prop, np.ndarray):
        smpl = sample[offset:offset+prop[attr].shape[0]]
        return smpl, offset+prop[attr].shape[0]
    else:
        return sample[offset], offset+1


================================================
FILE: timechamber/tasks/ase_humanoid_base/humanoid.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import os
import torch

from isaacgym import gymtorch
from isaacgym import gymapi
from isaacgym.torch_utils import *

from timechamber.utils import torch_utils
from timechamber.utils.utils import print_actor_info, print_asset_info
from timechamber.tasks.ase_humanoid_base.base_task import BaseTask

class Humanoid(BaseTask):
    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
        self.cfg = cfg
        self.sim_params = sim_params
        self.physics_engine = physics_engine

        ##
        self.borderline_space = self.cfg["env"]["borderlineSpace"]
        self.num_agents = self.cfg["env"].get("numAgents", 1)
        
        self._pd_control = self.cfg["env"]["pdControl"]
        self.power_scale = self.cfg["env"]["powerScale"]

        self.debug_viz = self.cfg["env"]["enableDebugVis"]
        self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
        self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
        self.plane_restitution = self.cfg["env"]["plane"]["restitution"]

        self.max_episode_length = self.cfg["env"]["episodeLength"]
        self._local_root_obs = self.cfg["env"]["localRootObs"]
        self._root_height_obs = self.cfg["env"].get("rootHeightObs", True)
        self._enable_early_termination = self.cfg["env"]["enableEarlyTermination"]
        
        key_bodies = self.cfg["env"]["keyBodies"]
        self._setup_character_props(key_bodies)

        self.cfg["env"]["numObservations"] = self.get_obs_size()
        self.cfg["env"]["numActions"] = self.get_action_size()

        self.cfg["device_type"] = device_type
        self.cfg["device_id"] = device_id
        self.cfg["headless"] = headless
         
        super().__init__(cfg=self.cfg)
        
        self.dt = self.control_freq_inv * sim_params.dt

        # get gym GPU state tensors
        actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
        # print(f"dof_state_tensor shape: {dof_state_tensor.shape}")
        sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
        rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim)
        contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim)

        sensors_per_env = 2
        self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs * self.num_agents, sensors_per_env * 6)

        dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
        self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs * self.num_agents, self.num_dof)

        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)
        self.gym.refresh_rigid_body_state_tensor(self.sim)
        self.gym.refresh_net_contact_force_tensor(self.sim)

        self._root_states = gymtorch.wrap_tensor(actor_root_state)
        # print(f'root_states:{self._root_states.shape}')
        num_actors = self.get_num_actors_per_env()
        # print(f"num actors: {num_actors}")

        self._humanoid_root_states = self._root_states
        # print(f"humanoid_root_states shape: {self._humanoid_root_states.shape}") # (num_envs*2, 13)
        self._initial_humanoid_root_states = self._humanoid_root_states.clone()
        self._initial_humanoid_root_states[:, 7:13] = 0 # zero for linear vel and angular vel

        self._humanoid_actor_ids = num_actors * torch.arange(self.num_envs, device=self.device, dtype=torch.int32)
        # print(f"humanoid_actor_ids: {self._humanoid_actor_ids}") # 0, 2, 4, 6...
        # print(f"humanoid indices: {self.humanoid_indices}") # 0, 2, 4, 6...
        # print(f"humanooid op indices: {self.humanoid_indices_op}") # 1, 3, 5, 7...

        # create some wrapper tensors for different slices
        self._dof_state = gymtorch.wrap_tensor(dof_state_tensor)
        dofs_per_env = self._dof_state.shape[0] // self.num_envs
        self._dof_pos = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 0]
        self._dof_vel = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 1]
        # op
        self._dof_pos_op = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., self.num_dof:, 0]
        self._dof_vel_op = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., self.num_dof:, 1]

        self._initial_dof_pos = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float)
        self._initial_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float)
        # op
        self._initial_dof_pos_op = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float)
        self._initial_dof_vel_op = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float)

        self._rigid_body_state = gymtorch.wrap_tensor(rigid_body_state)

        bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs
        rigid_body_state_reshaped = self._rigid_body_state.view(self.num_envs, bodies_per_env, 13)

        self._rigid_body_pos = rigid_body_state_reshaped[..., :self.num_bodies, 0:3]
        self._rigid_body_rot = rigid_body_state_reshaped[..., :self.num_bodies, 3:7]
        self._rigid_body_vel = rigid_body_state_reshaped[..., :self.num_bodies, 7:10]
        self._rigid_body_ang_vel = rigid_body_state_reshaped[..., :self.num_bodies, 10:13]
        # op
        self._rigid_body_pos_op = rigid_body_state_reshaped[..., self.num_bodies:, 0:3]
        self._rigid_body_rot_op = rigid_body_state_reshaped[..., self.num_bodies:, 3:7]
        self._rigid_body_vel_op = rigid_body_state_reshaped[..., self.num_bodies:, 7:10]
        self._rigid_body_ang_vel_op = rigid_body_state_reshaped[..., self.num_bodies:, 10:13]

        contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor)
        self._contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., :self.num_bodies, :]
        self._contact_forces_op = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., self.num_bodies:, :]

        self._terminate_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)

        self._build_termination_heights()

        contact_bodies = self.cfg["env"]["contactBodies"]
        self._key_body_ids = self._build_key_body_ids_tensor(key_bodies)
        self._contact_body_ids = self._build_contact_body_ids_tensor(contact_bodies)
        self.allocate_buffers()

        return

    def get_obs_size(self):
        return self._num_obs

    def get_action_size(self):
        return self._num_actions

    def get_num_actors_per_env(self):
        num_actors = self._root_states.shape[0] // self.num_envs
        return num_actors

    def _add_circle_borderline(self, env):
        lines = []
        borderline_height = 0.01
        for height in range(20):
            for angle in range(360):
                begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height]
                end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height]
                lines.append(begin_point)
                lines.append(end_point)
        lines = np.array(lines, dtype=np.float32) * self.borderline_space
        colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32)
        self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors)

    def _add_rectangle_borderline(self, env):
        lines = []
        colors = np.zeros((90*60, 3), dtype=np.float32)
        for k in range(4):
            for height in range(10):
                lines1 = []
                lines2 = []
                lines3 = []
                lines4 = []
                for i in range(90):
                    begin_point1 = [-self.borderline_space + i * self.borderline_space / 45,
                                self.borderline_space,
                                height*0.01+ k*0.25]
                    end_point1 = [-self.borderline_space + (i+1) * self.borderline_space / 45,
                                self.borderline_space,
                                height*0.01+ k*0.25]
                    begin_point2 = [self.borderline_space,
                                self.borderline_space - i * self.borderline_space / 45,
                                height*0.01+ k*0.25]
                    end_point2 = [self.borderline_space,
                                self.borderline_space - (i+1) * self.borderline_space / 45,
                                height*0.01+ k*0.25]
                    begin_point3 = [self.borderline_space - i * self.borderline_space / 45,
                                -self.borderline_space,
                                height*0.01+ k*0.25]
                    end_point3 = [self.borderline_space - (i+1) * self.borderline_space / 45,
                                -self.borderline_space,
                                height*0.01+ k*0.25]
                    begin_point4 = [-self.borderline_space ,
                                -self.borderline_space + i * self.borderline_space / 45,
                                height*0.01+ k*0.25]
                    end_point4 = [-self.borderline_space,
                                -self.borderline_space + (i+1) * self.borderline_space / 45,
                                height*0.01+ k*0.25]
                    lines1.append(begin_point1)
                    lines1.append(end_point1)
                    lines2.append(begin_point2)
                    lines2.append(end_point2)
                    lines3.append(begin_point3)
                    lines3.append(end_point3)
                    lines4.append(begin_point4)
                    lines4.append(end_point4)
                lines.extend(lines1)
                lines.extend(lines2)
                lines.extend(lines3)
                lines.extend(lines4)

        lines = np.array(lines, dtype=np.float32)

        colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32)
        self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors)

    def allocate_buffers(self):
        self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device,
                                   dtype=torch.float)
        self.states_buf = torch.zeros(
            (self.num_envs, self.num_states), device=self.device, dtype=torch.float)
        self.rew_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.float)
        self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
        self.timeout_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.progress_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.randomize_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.extras = {
            'win': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
            'lose': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
            'draw': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool)}
        self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
        self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
        self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))

    def create_sim(self):
        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)

        self._create_ground_plane()
        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
        return

    def reset(self, env_ids=None):
        if (env_ids is None):
            env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long)
        self._reset_envs(env_ids)
        return

    def set_char_color(self, col, env_ids):
        for env_id in env_ids:
            env_ptr = self.envs[env_id]
            handle = self.humanoid_handles[env_id]

            for j in range(self.num_bodies):
                self.gym.set_rigid_body_color(env_ptr, handle, j, gymapi.MESH_VISUAL,
                                              gymapi.Vec3(col[0], col[1], col[2]))

        return

    def _reset_envs(self, env_ids):
        if (len(env_ids) > 0):
            self._reset_actors(env_ids)
            self._reset_env_tensors(env_ids)
            self._refresh_sim_tensors()
            self._compute_observations()
        return

    def _reset_env_tensors(self, env_ids):
        # env_ids_int32 = self._humanoid_actor_ids[env_ids]
        env_ids_int32 = (torch.cat((self.humanoid_indices[env_ids],
                                    self.humanoid_indices_op[env_ids]))).to(dtype=torch.int32)
        self.gym.set_actor_root_state_tensor_indexed(self.sim,
                                                     gymtorch.unwrap_tensor(self._root_states),
                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
        self.gym.set_dof_state_tensor_indexed(self.sim,
                                              gymtorch.unwrap_tensor(self._dof_state),
                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))

        self.progress_buf[env_ids] = 0
        self.reset_buf[env_ids] = 0
        self._terminate_buf[env_ids] = 0
        
        return

    def _create_ground_plane(self):
        plane_params = gymapi.PlaneParams()
        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
        plane_params.static_friction = self.plane_static_friction
        plane_params.dynamic_friction = self.plane_dynamic_friction
        plane_params.restitution = self.plane_restitution
        self.gym.add_ground(self.sim, plane_params)
        return

    def _setup_character_props(self, key_bodies):
        asset_file = self.cfg["env"]["asset"]["assetFileName"]
        num_key_bodies = len(key_bodies)

        if (asset_file == "mjcf/amp_humanoid.xml"):
            self._dof_body_ids = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14]
            self._dof_offsets = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28]
            self._dof_obs_size = 72
            self._num_actions = 28
            self._num_obs = 1 + 15 * (3 + 6 + 3 + 3) - 3

        elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
            self._dof_body_ids = [1, 2, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16]
            self._dof_offsets = [0, 3, 6, 9, 10, 13, 16, 17, 20, 21, 24, 27, 28, 31]
            self._dof_obs_size = 78
            self._num_actions = 31
            self._num_obs = 1 + 17 * (3 + 6 + 3 + 3) - 3

        else:
            print("Unsupported character config file: {s}".format(asset_file))
            assert(False)

        return

    def _build_termination_heights(self):
        head_term_height = 0.3
        shield_term_height = 0.32

        termination_height = self.cfg["env"]["terminationHeight"]
        self._termination_heights = np.array([termination_height] * self.num_bodies)

        head_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "head")
        self._termination_heights[head_id] = max(head_term_height, self._termination_heights[head_id])

        asset_file = self.cfg["env"]["asset"]["assetFileName"]
        if (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
            left_arm_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "left_lower_arm")
            self._termination_heights[left_arm_id] = max(shield_term_height, self._termination_heights[left_arm_id])

        self._termination_heights = to_torch(self._termination_heights, device=self.device)
        return

    def _create_envs(self, num_envs, spacing, num_per_row):
        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
        upper = gymapi.Vec3(spacing, spacing, spacing)

        asset_root = self.cfg["env"]["asset"]["assetRoot"]
        asset_file = self.cfg["env"]["asset"]["assetFileName"]

        asset_path = os.path.join(asset_root, asset_file)
        asset_root = os.path.dirname(asset_path)
        asset_file = os.path.basename(asset_path)

        asset_options = gymapi.AssetOptions()
        asset_options.angular_damping = 0.01
        asset_options.max_angular_velocity = 100.0
        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
        #asset_options.fix_base_link = True
        humanoid_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
        humanoid_asset_op = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)

        actuator_props = self.gym.get_asset_actuator_properties(humanoid_asset)
        motor_efforts = [prop.motor_effort for prop in actuator_props]

        # create force sensors at the feet
        right_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "right_foot")
        left_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "left_foot")

        # op
        right_foot_idx_op = self.gym.find_asset_rigid_body_index(humanoid_asset_op, "right_foot")
        left_foot_idx_op = self.gym.find_asset_rigid_body_index(humanoid_asset_op, "left_foot")

        sensor_pose = gymapi.Transform()
        sensor_pose_op = gymapi.Transform()

        self.gym.create_asset_force_sensor(humanoid_asset, right_foot_idx, sensor_pose)
        self.gym.create_asset_force_sensor(humanoid_asset, left_foot_idx, sensor_pose)

        # op
        self.gym.create_asset_force_sensor(humanoid_asset_op, right_foot_idx_op, sensor_pose_op)
        self.gym.create_asset_force_sensor(humanoid_asset_op, left_foot_idx_op, sensor_pose_op)

        self.max_motor_effort = max(motor_efforts)
        self.motor_efforts = to_torch(motor_efforts, device=self.device)

        self.torso_index = 0

        # 17 bodies
        self.num_bodies = self.gym.get_asset_rigid_body_count(humanoid_asset)

        # 31 dofs
        self.num_dof = self.gym.get_asset_dof_count(humanoid_asset)

        # 34 joints
        self.num_joints = self.gym.get_asset_joint_count(humanoid_asset)

        self.humanoid_handles = []
        self.humanoid_handles_op = []
        self.humanoid_indices = []
        self.humanoid_indices_op = []
        self.envs = []
        self.dof_limits_lower = []
        self.dof_limits_upper = []

        for i in range(self.num_envs):
            # create env instance
            env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row)
            self._build_env(i, env_ptr, humanoid_asset, humanoid_asset_op)
            self.envs.append(env_ptr)

        dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.humanoid_handles[0])
        for j in range(self.num_dof):
            if dof_prop['lower'][j] > dof_prop['upper'][j]:
                self.dof_limits_lower.append(dof_prop['upper'][j])
                self.dof_limits_upper.append(dof_prop['lower'][j])
            else:
                self.dof_limits_lower.append(dof_prop['lower'][j])
                self.dof_limits_upper.append(dof_prop['upper'][j])

        self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
        self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
        self.humanoid_indices = to_torch(self.humanoid_indices, dtype=torch.long, device=self.device)
        self.humanoid_indices_op = to_torch(self.humanoid_indices_op, dtype=torch.long, device=self.device)
        
        if (self._pd_control):
            self._build_pd_action_offset_scale()

        return

    def _build_env(self, env_id, env_ptr, humanoid_asset, humanoid_asset_op):
        col_group = env_id
        col_filter = self._get_humanoid_collision_filter()
        segmentation_id = 0

        start_pose = gymapi.Transform()
        start_pose_op = gymapi.Transform()
        # asset_file = self.cfg["env"]["asset"]["assetFileName"]
        # char_h = 0.89

        start_pose.p = gymapi.Vec3(-self.borderline_space + 2, -self.borderline_space + 2, 0.89)
        start_pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)

        start_pose_op.p = gymapi.Vec3(self.borderline_space - 2, self.borderline_space - 2, 0.89)
        # start_pose_op.p = gymapi.Vec3(0, 0, 0.89)
        start_pose_op.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)

        humanoid_handle = self.gym.create_actor(env_ptr, humanoid_asset, start_pose, "humanoid", col_group, col_filter, segmentation_id)
        humanoid_index = self.gym.get_actor_index(env_ptr, humanoid_handle, gymapi.DOMAIN_SIM)

        humanoid_handle_op = self.gym.create_actor(env_ptr, humanoid_asset_op, start_pose_op, "humanoid", col_group, col_filter, segmentation_id)
        humanoid_index_op = self.gym.get_actor_index(env_ptr, humanoid_handle_op, gymapi.DOMAIN_SIM)

        self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle)
        self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle_op)

        for j in range(self.num_bodies):
            self.gym.set_rigid_body_color(env_ptr, humanoid_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.54, 0.85, 0.2))
            self.gym.set_rigid_body_color(env_ptr, humanoid_handle_op, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.97, 0.38, 0.06))

        if (self._pd_control):
            dof_prop = self.gym.get_asset_dof_properties(humanoid_asset)
            dof_prop["driveMode"] = gymapi.DOF_MODE_POS
            self.gym.set_actor_dof_properties(env_ptr, humanoid_handle, dof_prop)

            dof_prop_op = self.gym.get_asset_dof_properties(humanoid_asset_op)
            dof_prop_op["driveMode"] = gymapi.DOF_MODE_POS
            self.gym.set_actor_dof_properties(env_ptr, humanoid_handle_op, dof_prop_op)

        self.humanoid_handles.append(humanoid_handle)
        self.humanoid_indices.append(humanoid_index)
        self.humanoid_handles_op.append(humanoid_handle_op)
        self.humanoid_indices_op.append(humanoid_index_op)

        return

    def _build_pd_action_offset_scale(self):
        num_joints = len(self._dof_offsets) - 1

        lim_low = self.dof_limits_lower.cpu().numpy()
        lim_high = self.dof_limits_upper.cpu().numpy()

        for j in range(num_joints):
            dof_offset = self._dof_offsets[j]
            dof_size = self._dof_offsets[j + 1] - self._dof_offsets[j]

            if (dof_size == 3):
                curr_low = lim_low[dof_offset:(dof_offset + dof_size)]
                curr_high = lim_high[dof_offset:(dof_offset + dof_size)]
                curr_low = np.max(np.abs(curr_low))
                curr_high = np.max(np.abs(curr_high))
                curr_scale = max([curr_low, curr_high])
                curr_scale = 1.2 * curr_scale
                curr_scale = min([curr_scale, np.pi])

                lim_low[dof_offset:(dof_offset + dof_size)] = -curr_scale
                lim_high[dof_offset:(dof_offset + dof_size)] = curr_scale
                
                #lim_low[dof_offset:(dof_offset + dof_size)] = -np.pi
                #lim_high[dof_offset:(dof_offset + dof_size)] = np.pi


            elif (dof_size == 1):
                curr_low = lim_low[dof_offset]
                curr_high = lim_high[dof_offset]
                curr_mid = 0.5 * (curr_high + curr_low)
                
                # extend the action range to be a bit beyond the joint limits so that the motors
                # don't lose their strength as they approach the joint limits
                curr_scale = 0.7 * (curr_high - curr_low)
                curr_low = curr_mid - curr_scale
                curr_high = curr_mid + curr_scale

                lim_low[dof_offset] = curr_low
                lim_high[dof_offset] =  curr_high

        self._pd_action_offset = 0.5 * (lim_high + lim_low)
        self._pd_action_scale = 0.5 * (lim_high - lim_low)
        self._pd_action_offset = to_torch(self._pd_action_offset, device=self.device)
        self._pd_action_scale = to_torch(self._pd_action_scale, device=self.device)
        return

    def _get_humanoid_collision_filter(self):
        return 0

    def _compute_reward(self, actions):
        self.rew_buf[:] = compute_humanoid_reward(self.obs_buf)
        return

    def _compute_reset(self):
        self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf,
                                                   self._contact_forces, self._contact_body_ids,
                                                   self._rigid_body_pos, self.max_episode_length,
                                                   self._enable_early_termination, self._termination_heights)
        return

    def _refresh_sim_tensors(self):
        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)
        self.gym.refresh_rigid_body_state_tensor(self.sim)

        self.gym.refresh_force_sensor_tensor(self.sim)
        self.gym.refresh_dof_force_tensor(self.sim)
        self.gym.refresh_net_contact_force_tensor(self.sim)
        return

    def _compute_observations(self):
        obs, obs_op = self._compute_humanoid_obs()

        self.obs_buf[:self.num_envs] = obs
        self.obs_buf[self.num_envs:] = obs_op

        return

    def _compute_humanoid_obs(self):
        body_pos = self._rigid_body_pos
        body_rot = self._rigid_body_rot
        body_vel = self._rigid_body_vel
        body_ang_vel = self._rigid_body_ang_vel

        body_pos_op = self._rigid_body_pos_op
        body_rot_op = self._rigid_body_rot_op
        body_vel_op = self._rigid_body_vel_op
        body_ang_vel_op = self._rigid_body_ang_vel_op
        
        obs = compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, self._local_root_obs,
                                                self._root_height_obs)
        
        obs_op = compute_humanoid_observations_max(body_pos_op, body_rot_op, body_vel_op, body_ang_vel_op, self._local_root_obs,
                                                self._root_height_obs)
        
        return obs, obs_op

    def _reset_actors(self, env_ids):
        agent_env_ids = expand_env_ids(env_ids, 2)
        self._humanoid_root_states[agent_env_ids] = self._initial_humanoid_root_states[agent_env_ids]
        self._dof_pos[env_ids] = self._initial_dof_pos[env_ids]
        self._dof_vel[env_ids] = self._initial_dof_vel[env_ids]
        self._dof_pos_op[env_ids] = self._initial_dof_pos_op[env_ids]
        self._dof_vel_op[env_ids] = self._initial_dof_vel_op[env_ids]
        return

    def pre_physics_step(self, actions):
        self.actions = actions.to(self.device).clone()
        ego_actions = self.actions[:self.num_envs]
        op_actions = self.actions[self.num_envs:]
        if (self._pd_control):
            pd_tar_ego = self._action_to_pd_targets(ego_actions)
            pd_tar_op = self._action_to_pd_targets(op_actions)
            pd_tar = torch.cat([pd_tar_ego, pd_tar_op], dim=-1)
            pd_tar_tensor = gymtorch.unwrap_tensor(pd_tar)

            self.gym.set_dof_position_target_tensor(self.sim, pd_tar_tensor)
        else:
            forces = self.actions * self.motor_efforts.unsqueeze(0) * self.power_scale
            force_tensor = gymtorch.unwrap_tensor(forces)
            self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor)

        return

    def post_physics_step(self):
        self.progress_buf += 1

        self._refresh_sim_tensors()
        self._compute_observations()
        self._compute_reward(self.actions)
        self._compute_reset()

        self.extras["terminate"] = self._terminate_buf

        # debug viz
        if self.viewer and self.debug_viz:
            self._update_debug_viz()

        return

    def render(self, sync_frame_time=False):

        super().render(sync_frame_time)
        return

    def _build_key_body_ids_tensor(self, key_body_names):
        env_ptr = self.envs[0]
        actor_handle = self.humanoid_handles[0]
        body_ids = []

        for body_name in key_body_names:
            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
            assert(body_id != -1)
            body_ids.append(body_id)

        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
        return body_ids

    def _build_contact_body_ids_tensor(self, contact_body_names):
        env_ptr = self.envs[0]
        actor_handle = self.humanoid_handles[0]
        body_ids = []

        for body_name in contact_body_names:
            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
            assert(body_id != -1)
            body_ids.append(body_id)

        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
        return body_ids

    def _action_to_pd_targets(self, action):
        pd_tar = self._pd_action_offset + self._pd_action_scale * action
        return pd_tar

    def _update_debug_viz(self):
        self.gym.clear_lines(self.viewer)
        return

#####################################################################
###=========================jit functions=========================###
#####################################################################

@torch.jit.script
def dof_to_obs(pose, dof_obs_size, dof_offsets):
    # type: (Tensor, int, List[int]) -> Tensor
    joint_obs_size = 6
    num_joints = len(dof_offsets) - 1

    dof_obs_shape = pose.shape[:-1] + (dof_obs_size,)
    dof_obs = torch.zeros(dof_obs_shape, device=pose.device)
    dof_obs_offset = 0

    for j in range(num_joints):
        dof_offset = dof_offsets[j]
        dof_size = dof_offsets[j + 1] - dof_offsets[j]
        joint_pose = pose[:, dof_offset:(dof_offset + dof_size)]

        # assume this is a spherical joint
        if (dof_size == 3):
            joint_pose_q = torch_utils.exp_map_to_quat(joint_pose)
        elif (dof_size == 1):
            axis = torch.tensor([0.0, 1.0, 0.0], dtype=joint_pose.dtype, device=pose.device)
            joint_pose_q = quat_from_angle_axis(joint_pose[..., 0], axis)
        else:
            joint_pose_q = None
            assert(False), "Unsupported joint type"

        joint_dof_obs = torch_utils.quat_to_tan_norm(joint_pose_q)
        dof_obs[:, (j * joint_obs_size):((j + 1) * joint_obs_size)] = joint_dof_obs

    assert((num_joints * joint_obs_size) == dof_obs_size)

    return dof_obs

@torch.jit.script
def compute_humanoid_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos,
                                  local_root_obs, root_height_obs, dof_obs_size, dof_offsets):
    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor
    root_h = root_pos[:, 2:3]
    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)

    if (local_root_obs):
        root_rot_obs = quat_mul(heading_rot, root_rot)
    else:
        root_rot_obs = root_rot
    root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs)
    
    if (not root_height_obs):
        root_h_obs = torch.zeros_like(root_h)
    else:
        root_h_obs = root_h
    
    local_root_vel = quat_rotate(heading_rot, root_vel)
    local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel)

    root_pos_expand = root_pos.unsqueeze(-2)
    local_key_body_pos = key_body_pos - root_pos_expand

    heading_rot_expand = heading_rot.unsqueeze(-2)
    heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1))
    flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2])
    flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], 
                                               heading_rot_expand.shape[2])
    local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos)
    flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2])

    dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets)

    obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1)
    return obs

@torch.jit.script
def compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, local_root_obs, root_height_obs):
    # type: (Tensor, Tensor, Tensor, Tensor, bool, bool) -> Tensor
    root_pos = body_pos[:, 0, :] # 0: pelvis, root
    root_rot = body_rot[:, 0, :]

    root_h = root_pos[:, 2:3] # 1. Height of the root from the ground
    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)

    if (not root_height_obs):
        root_h_obs = torch.zeros_like(root_h)
    else:
        root_h_obs = root_h

    heading_rot_expand = heading_rot.unsqueeze(-2) # num_envs, 1, 4
    # num_envs, body_pos.shape[1], 4
    heading_rot_expand = heading_rot_expand.repeat((1, body_pos.shape[1], 1))
    flat_heading_rot = heading_rot_expand.reshape(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], 
                                               heading_rot_expand.shape[2])

    root_pos_expand = root_pos.unsqueeze(-2)
    local_body_pos = body_pos - root_pos_expand
    flat_local_body_pos = local_body_pos.reshape(local_body_pos.shape[0] * local_body_pos.shape[1], local_body_pos.shape[2])
    flat_local_body_pos = quat_rotate(flat_heading_rot, flat_local_body_pos)
    local_body_pos = flat_local_body_pos.reshape(local_body_pos.shape[0], local_body_pos.shape[1] * local_body_pos.shape[2])
    local_body_pos = local_body_pos[..., 3:] # remove root pos

    flat_body_rot = body_rot.reshape(body_rot.shape[0] * body_rot.shape[1], body_rot.shape[2])
    flat_local_body_rot = quat_mul(flat_heading_rot, flat_body_rot)
    flat_local_body_rot_obs = torch_utils.quat_to_tan_norm(flat_local_body_rot)
    local_body_rot_obs = flat_local_body_rot_obs.reshape(body_rot.shape[0], body_rot.shape[1] * flat_local_body_rot_obs.shape[1])

    if (local_root_obs):
        root_rot_obs = torch_utils.quat_to_tan_norm(root_rot)
        local_body_rot_obs[..., 0:6] = root_rot_obs

    flat_body_vel = body_vel.reshape(body_vel.shape[0] * body_vel.shape[1], body_vel.shape[2])
    flat_local_body_vel = quat_rotate(flat_heading_rot, flat_body_vel)
    local_body_vel = flat_local_body_vel.reshape(body_vel.shape[0], body_vel.shape[1] * body_vel.shape[2])

    flat_body_ang_vel = body_ang_vel.reshape(body_ang_vel.shape[0] * body_ang_vel.shape[1], body_ang_vel.shape[2])
    flat_local_body_ang_vel = quat_rotate(flat_heading_rot, flat_body_ang_vel)
    local_body_ang_vel = flat_local_body_ang_vel.reshape(body_ang_vel.shape[0], body_ang_vel.shape[1] * body_ang_vel.shape[2])

    obs = torch.cat((root_h_obs, local_body_pos, local_body_rot_obs, local_body_vel, local_body_ang_vel), dim=-1)
    return obs


@torch.jit.script
def expand_env_ids(env_ids, n_agents):
    # type: (Tensor, int) -> Tensor
    device = env_ids.device
    agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
    for idx in range(n_agents):
        agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
    return agent_env_ids

@torch.jit.script
def compute_humanoid_reward(obs_buf):
    # type: (Tensor) -> Tensor
    reward = torch.ones_like(obs_buf[:, 0])
    return reward

@torch.jit.script
def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos,
                           max_episode_length, enable_early_termination, termination_heights):
    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor]
    terminated = torch.zeros_like(reset_buf)

    if (enable_early_termination):
        masked_contact_buf = contact_buf.clone()
        masked_contact_buf[:, contact_body_ids, :] = 0
        fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
        fall_contact = torch.any(fall_contact, dim=-1)

        body_height = rigid_body_pos[..., 2]
        fall_height = body_height < termination_heights
        fall_height[:, contact_body_ids] = False
        fall_height = torch.any(fall_height, dim=-1)

        has_fallen = torch.logical_and(fall_contact, fall_height)

        # first timestep can sometimes still have nonzero contact forces
        # so only check after first couple of steps
        has_fallen *= (progress_buf > 1)
        terminated = torch.where(has_fallen, torch.ones_like(reset_buf), terminated)
    
    reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)

    return reset, terminated


================================================
FILE: timechamber/tasks/ase_humanoid_base/humanoid_amp.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from enum import Enum
import numpy as np
import torch

from isaacgym import gymapi
from isaacgym import gymtorch

from timechamber.tasks.ase_humanoid_base.humanoid import Humanoid, dof_to_obs
from timechamber.utils import gym_util
from timechamber.utils.motion_lib import MotionLib
from isaacgym.torch_utils import *

from utils import torch_utils

class HumanoidAMP(Humanoid):
    class StateInit(Enum):
        Default = 0
        Start = 1
        Random = 2
        Hybrid = 3

    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
        state_init = cfg["env"]["stateInit"]
        self._state_init = HumanoidAMP.StateInit[state_init]
        self._hybrid_init_prob = cfg["env"]["hybridInitProb"]
        self._num_amp_obs_steps = cfg["env"]["numAMPObsSteps"]
        assert(self._num_amp_obs_steps >= 2)

        self._reset_default_env_ids = []
        self._reset_ref_env_ids = []

        super().__init__(cfg=cfg,
                         sim_params=sim_params,
                         physics_engine=physics_engine,
                         device_type=device_type,
                         device_id=device_id,
                         headless=headless)

        motion_file = cfg['env']['motion_file']
        self._load_motion(motion_file)

        self._amp_obs_buf = torch.zeros((self.num_envs, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float)
        self._curr_amp_obs_buf = self._amp_obs_buf[:, 0]
        self._hist_amp_obs_buf = self._amp_obs_buf[:, 1:]
        
        self._amp_obs_demo_buf = None

        return

    def post_physics_step(self):
        super().post_physics_step()
        
        self._update_hist_amp_obs()
        self._compute_amp_observations()

        amp_obs_flat = self._amp_obs_buf.view(-1, self.get_num_amp_obs())
        self.extras["amp_obs"] = amp_obs_flat

        return

    def get_num_amp_obs(self):
        return self._num_amp_obs_steps * self._num_amp_obs_per_step

    def fetch_amp_obs_demo(self, num_samples):

        if (self._amp_obs_demo_buf is None):
            self._build_amp_obs_demo_buf(num_samples)
        else:
            assert(self._amp_obs_demo_buf.shape[0] == num_samples)
        
        motion_ids = self._motion_lib.sample_motions(num_samples)
        motion_times0 = self._motion_lib.sample_time(motion_ids)
        amp_obs_demo = self.build_amp_obs_demo(motion_ids, motion_times0)
        self._amp_obs_demo_buf[:] = amp_obs_demo.view(self._amp_obs_demo_buf.shape)
        amp_obs_demo_flat = self._amp_obs_demo_buf.view(-1, self.get_num_amp_obs())

        return amp_obs_demo_flat

    def build_amp_obs_demo(self, motion_ids, motion_times0):
        dt = self.dt

        motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps])
        motion_times = motion_times0.unsqueeze(-1)
        time_steps = -dt * torch.arange(0, self._num_amp_obs_steps, device=self.device)
        motion_times = motion_times + time_steps

        motion_ids = motion_ids.view(-1)
        motion_times = motion_times.view(-1)
        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
               = self._motion_lib.get_motion_state(motion_ids, motion_times)
        amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel,
                                              dof_pos, dof_vel, key_pos,
                                              self._local_root_obs, self._root_height_obs,
                                              self._dof_obs_size, self._dof_offsets)
        return amp_obs_demo

    def _build_amp_obs_demo_buf(self, num_samples):
        self._amp_obs_demo_buf = torch.zeros((num_samples, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float32)
        return
        
    def _setup_character_props(self, key_bodies):
        super()._setup_character_props(key_bodies)

        asset_file = self.cfg["env"]["asset"]["assetFileName"]
        num_key_bodies = len(key_bodies)

        if (asset_file == "mjcf/amp_humanoid.xml"):
            self._num_amp_obs_per_step = 13 + self._dof_obs_size + 28 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
        elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
            self._num_amp_obs_per_step = 13 + self._dof_obs_size + 31 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
        else:
            print("Unsupported character config file: {s}".format(asset_file))
            assert(False)

        return

    def _load_motion(self, motion_file):
        assert(self._dof_offsets[-1] == self.num_dof)
        self._motion_lib = MotionLib(motion_file=motion_file,
                                     dof_body_ids=self._dof_body_ids,
                                     dof_offsets=self._dof_offsets,
                                     key_body_ids=self._key_body_ids.cpu().numpy(), 
                                     device=self.device)
        return

    def _reset_envs(self, env_ids):
        self._reset_default_env_ids = []
        self._reset_ref_env_ids = []
        super()._reset_envs(env_ids)
        self._init_amp_obs(env_ids)

        return

    def _reset_actors(self, env_ids):
        if (self._state_init == HumanoidAMP.StateInit.Default):
            self._reset_default(env_ids)
        elif (self._state_init == HumanoidAMP.StateInit.Start
              or self._state_init == HumanoidAMP.StateInit.Random):
            self._reset_ref_state_init(env_ids)
        elif (self._state_init == HumanoidAMP.StateInit.Hybrid):
            self._reset_hybrid_state_init(env_ids)
        else:
            assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init))
        return

    def _reset_default(self, env_ids):
        super()._reset_actors(env_ids)
        # self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids]
        # self._dof_pos[env_ids] = self._initial_dof_pos[env_ids]
        # self._dof_vel[env_ids] = self._initial_dof_vel[env_ids]
        # self._reset_default_env_ids = env_ids
        return

    def _reset_ref_state_init(self, env_ids):
        num_envs = env_ids.shape[0]
        motion_ids = self._motion_lib.sample_motions(num_envs)
        
        if (self._state_init == HumanoidAMP.StateInit.Random
            or self._state_init == HumanoidAMP.StateInit.Hybrid):
            motion_times = self._motion_lib.sample_time(motion_ids)
        elif (self._state_init == HumanoidAMP.StateInit.Start):
            motion_times = torch.zeros(num_envs, device=self.device)
        else:
            assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init))

        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
               = self._motion_lib.get_motion_state(motion_ids, motion_times)

        self._set_env_state(env_ids=env_ids, 
                            root_pos=root_pos, 
                            root_rot=root_rot, 
                            dof_pos=dof_pos, 
                            root_vel=root_vel, 
                            root_ang_vel=root_ang_vel, 
                            dof_vel=dof_vel)

        self._reset_ref_env_ids = env_ids
        self._reset_ref_motion_ids = motion_ids
        self._reset_ref_motion_times = motion_times
        return

    def _reset_hybrid_state_init(self, env_ids):
        num_envs = env_ids.shape[0]
        ref_probs = to_torch(np.array([self._hybrid_init_prob] * num_envs), device=self.device)
        ref_init_mask = torch.bernoulli(ref_probs) == 1.0

        ref_reset_ids = env_ids[ref_init_mask]
        if (len(ref_reset_ids) > 0):
            self._reset_ref_state_init(ref_reset_ids)

        default_reset_ids = env_ids[torch.logical_not(ref_init_mask)]
        if (len(default_reset_ids) > 0):
            self._reset_default(default_reset_ids)

        return

    def _init_amp_obs(self, env_ids):
        self._compute_amp_observations(env_ids)
        
        if (len(self._reset_default_env_ids) > 0):
            self._init_amp_obs_default(self._reset_default_env_ids)

        if (len(self._reset_ref_env_ids) > 0):
            self._init_amp_obs_ref(self._reset_ref_env_ids, self._reset_ref_motion_ids,
                                   self._reset_ref_motion_times)
        
        return

    def _init_amp_obs_default(self, env_ids):
        curr_amp_obs = self._curr_amp_obs_buf[env_ids].unsqueeze(-2)
        self._hist_amp_obs_buf[env_ids] = curr_amp_obs
        return

    def _init_amp_obs_ref(self, env_ids, motion_ids, motion_times):
        dt = self.dt
        motion_ids = torch.tile(motion_ids, [1, self._num_amp_obs_steps - 1])
        motion_times = motion_times.unsqueeze(-1)
        time_steps = -dt * (torch.arange(0, self._num_amp_obs_steps - 1, device=self.device) + 1)
        motion_times = motion_times + time_steps

        motion_ids = motion_ids.view(-1)
        motion_times = motion_times.view(-1)
        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
               = self._motion_lib.get_motion_state(motion_ids, motion_times)
        amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, 
                                              dof_pos, dof_vel, key_pos, 
                                              self._local_root_obs, self._root_height_obs, 
                                              self._dof_obs_size, self._dof_offsets)
        self._hist_amp_obs_buf[env_ids] = amp_obs_demo.view(self._hist_amp_obs_buf[env_ids].shape)
        return
    
    def _set_env_state(self, env_ids, root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel):
        self._humanoid_root_states[env_ids, 0:3] = root_pos
        self._humanoid_root_states[env_ids, 3:7] = root_rot
        self._humanoid_root_states[env_ids, 7:10] = root_vel
        self._humanoid_root_states[env_ids, 10:13] = root_ang_vel
        
        self._dof_pos[env_ids] = dof_pos
        self._dof_vel[env_ids] = dof_vel
        return

    def _update_hist_amp_obs(self, env_ids=None):
        if (env_ids is None):
            self._hist_amp_obs_buf[:] = self._amp_obs_buf[:, 0:(self._num_amp_obs_steps - 1)]
        else:
            self._hist_amp_obs_buf[env_ids] = self._amp_obs_buf[env_ids, 0:(self._num_amp_obs_steps - 1)]
        return

    def _compute_amp_observations(self, env_ids=None):
        key_body_pos = self._rigid_body_pos[:, self._key_body_ids, :]
        if (env_ids is None):
            self._curr_amp_obs_buf[:] = build_amp_observations(self._rigid_body_pos[:, 0, :],
                                                               self._rigid_body_rot[:, 0, :],
                                                               self._rigid_body_vel[:, 0, :],
                                                               self._rigid_body_ang_vel[:, 0, :],
                                                               self._dof_pos, self._dof_vel, key_body_pos,
                                                               self._local_root_obs, self._root_height_obs, 
                                                               self._dof_obs_size, self._dof_offsets)
        else:
            self._curr_amp_obs_buf[env_ids] = build_amp_observations(self._rigid_body_pos[env_ids][:, 0, :],
                                                                   self._rigid_body_rot[env_ids][:, 0, :],
                                                                   self._rigid_body_vel[env_ids][:, 0, :],
                                                                   self._rigid_body_ang_vel[env_ids][:, 0, :],
                                                                   self._dof_pos[env_ids], self._dof_vel[env_ids], key_body_pos[env_ids],
                                                                   self._local_root_obs, self._root_height_obs, 
                                                                   self._dof_obs_size, self._dof_offsets)
        return


#####################################################################
###=========================jit functions=========================###
#####################################################################

@torch.jit.script
def build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos, 
                           local_root_obs, root_height_obs, dof_obs_size, dof_offsets):
    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor
    root_h = root_pos[:, 2:3]
    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)

    if (local_root_obs):
        root_rot_obs = quat_mul(heading_rot, root_rot)
    else:
        root_rot_obs = root_rot
    root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs)
    
    if (not root_height_obs):
        root_h_obs = torch.zeros_like(root_h)
    else:
        root_h_obs = root_h
    
    local_root_vel = quat_rotate(heading_rot, root_vel)
    local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel)

    root_pos_expand = root_pos.unsqueeze(-2)
    local_key_body_pos = key_body_pos - root_pos_expand
    
    heading_rot_expand = heading_rot.unsqueeze(-2)
    heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1))
    flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2])
    flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], 
                                               heading_rot_expand.shape[2])
    local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos)
    flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2])
    
    dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets)
    obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1)
    return obs

================================================
FILE: timechamber/tasks/ase_humanoid_base/humanoid_amp_task.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch

import timechamber.tasks.ase_humanoid_base.humanoid_amp as humanoid_amp

class HumanoidAMPTask(humanoid_amp.HumanoidAMP):
    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
        self._enable_task_obs = cfg["env"]["enableTaskObs"]

        super().__init__(cfg=cfg,
                         sim_params=sim_params,
                         physics_engine=physics_engine,
                         device_type=device_type,
                         device_id=device_id,
                         headless=headless)
        return

    def get_obs_size(self):
        obs_size = super().get_obs_size()
        if (self._enable_task_obs):
            task_obs_size = self.get_task_obs_size()
            obs_size += task_obs_size
        return obs_size

    def get_task_obs_size(self):
        return 0

    def pre_physics_step(self, actions):
        super().pre_physics_step(actions)
        self._update_task()
        return

    def render(self, sync_frame_time=False):
        super().render(sync_frame_time)

        if self.viewer:
            self._draw_task()
        return

    def _update_task(self):
        return

    def _reset_envs(self, env_ids):
        super()._reset_envs(env_ids)
        self._reset_task(env_ids)
        return

    def _reset_task(self, env_ids):
        return

    def _compute_observations(self):
        # humanoid_obs = self._compute_humanoid_obs()
        
        # if (self._enable_task_obs):
        #     task_obs = self._compute_task_obs(env_ids=None)
        #     obs = torch.cat([humanoid_obs, task_obs], dim=-1)
        # else:
        #     obs = humanoid_obs

        # if (env_ids is None):
            # self.obs_buf[:] = obs
        # else:
        #     self.obs_buf[env_ids] = obs
        obs, obs_op = self._compute_humanoid_obs()
        if (self._enable_task_obs):
            task_obs = self._compute_task_obs(env_ids=None)
            obs = torch.cat([obs, task_obs], dim=-1)
        # else:

        self.obs_buf[:self.num_envs] = obs
        self.obs_buf[self.num_envs:] = obs_op

        return

    def _compute_task_obs(self, env_ids=None):
        return NotImplemented

    def _compute_reward(self, actions):
        return NotImplemented

    def _draw_task(self):
        return

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/README.md
================================================
# poselib

`poselib` is a library for loading, manipulating, and retargeting skeleton poses and motions. It is separated into three modules: `poselib.core` for basic data loading and tensor operations, `poselib.skeleton` for higher-level skeleton operations, and `poselib.visualization` for displaying skeleton poses. This library is built on top of the PyTorch framework and requires data to be in PyTorch tensors.

## poselib.core
- `poselib.core.rotation3d`: A set of Torch JIT functions for computing quaternions, transforms, and rotation/transformation matrices.
    - `quat_*` manipulate and create quaternions in [x, y, z, w] format (where w is the real component).
    - `transform_*` handle 7D transforms in [quat, pos] format.
    - `rot_matrix_*` handle 3x3 rotation matrices.
    - `euclidean_*` handle 4x4 Euclidean transformation matrices.
- `poselib.core.tensor_utils`: Provides loading and saving functions for PyTorch tensors.

## poselib.skeleton
- `poselib.skeleton.skeleton3d`: Utilities for loading and manipulating skeleton poses, and retargeting poses to different skeletons.
    - `SkeletonTree` is a class that stores a skeleton as a tree structure. This describes the skeleton topology and joints.
    - `SkeletonState` describes the static state of a skeleton, and provides both global and local joint angles.
    - `SkeletonMotion` describes a time-series of skeleton states and provides utilities for computing joint velocities.

## poselib.visualization
- `poselib.visualization.common`: Functions used for visualizing skeletons interactively in `matplotlib`.
    - In SkeletonState visualization, use key `q` to quit window.
    - In interactive SkeletonMotion visualization, you can use the following key commands:
        - `w` - loop animation
        - `x` - play/pause animation
        - `z` - previous frame
        - `c` - next frame
        - `n` - quit window

## Key Features
Poselib provides several key features for working with animation data. We list some of the frequently used ones here, and provide instructions and examples on their usage.

### Importing from FBX
Poselib supports importing skeletal animation sequences from .fbx format into a SkeletonMotion representation. To use this functionality, you will need to first set up the Python FBX SDK on your machine using the following instructions.

This package is necessary to read data from fbx files, which is a proprietary file format owned by Autodesk. The latest FBX SDK tested was FBX SDK 2020.2.1 for Python 3.7, which can be found on the Autodesk website: https://www.autodesk.com/developer-network/platform-technologies/fbx-sdk-2020-2-1.

Follow the instructions at https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html for download, install, and copy/paste instructions for the FBX Python SDK.

This repo provides an example script `fbx_importer.py` that shows usage of importing a .fbx file. Note that `SkeletonMotion.from_fbx()` takes in an optional parameter `root_joint`, which can be used to specify a joint in the skeleton tree as the root joint. If `root_joint` is not specified, we will default to using the first node in the FBX scene that contains animation data. 

### Importing from MJCF
MJCF is a robotics file format supported by Isaac Gym. For convenience, we provide an API for importing MJCF assets into SkeletonTree definitions to represent the skeleton topology. An example script `mjcf_importer.py` is provided to show usage of this.

This can be helpful if motion sequences need to be retargeted to your simulation skeleton that's been created in MJCF format. Importing the file to SkeletonTree format will allow you to generate T-poses or other retargeting poses that can be used for retargeting. We also show an example of creating a T-Pose for our AMP Humanoid asset in `generate_amp_humanoid_tpose.py`.

### Retargeting Motions
Retargeting motions is important when your source data uses skeletons that have different morphologies than your target skeletons. We provide APIs for performing retarget of motion sequences in our SkeletonState and SkeletonMotion classes.

To use the retargeting API, users must provide the following information:
  - source_motion: a SkeletonMotion npy representation of a motion sequence. The motion clip should use the same skeleton as the source T-Pose skeleton.
  - target_motion_path: path to save the retargeted motion to
  - source_tpose: a SkeletonState npy representation of the source skeleton in it's T-Pose state
  - target_tpose: a SkeletonState npy representation of the target skeleton in it's T-Pose state (pose should match source T-Pose)
  - joint_mapping: mapping of joint names from source to target
  - rotation: root rotation offset from source to target skeleton (for transforming across different orientation axes), represented as a quaternion in XYZW order.
  - scale: scale offset from source to target skeleton

We provide an example script `retarget_motion.py` to demonstrate usage of the retargeting API for the CMU Motion Capture Database. Note that the retargeting data for this script is stored in `data/configs/retarget_cmu_to_amp.json`.

Additionally, a SkeletonState T-Pose file and retargeting config file are also provided for the SFU Motion Capture Database. These can be found at `data/sfu_tpose.npy` and `data/configs/retarget_sfu_to_amp.json`.

### Documentation
We provide a description of the functions and classes available in poselib in the comments of the APIs. Please check them out for more details.


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/data/configs/retarget_cmu_to_amp.json
================================================
{
    "source_motion": "data/01_01_cmu.npy",
    "target_motion_path": "data/01_01_cmu_amp.npy",
    "source_tpose": "data/cmu_tpose.npy",
    "target_tpose": "data/amp_humanoid_tpose.npy",
    "joint_mapping": {
         "Hips": "pelvis",
         "LeftUpLeg": "left_thigh",
         "LeftLeg": "left_shin",
         "LeftFoot": "left_foot",
         "RightUpLeg": "right_thigh",
         "RightLeg": "right_shin",
         "RightFoot": "right_foot",
         "Spine1": "torso",
         "Head": "head",
         "LeftArm": "left_upper_arm",
         "LeftForeArm": "left_lower_arm",
         "LeftHand": "left_hand",
         "RightArm": "right_upper_arm",
         "RightForeArm": "right_lower_arm",
         "RightHand": "right_hand"
    },
    "rotation": [0, 0, 0.7071068, 0.7071068],
    "scale": 0.056444,
	"root_height_offset": 0.05,
	"trim_frame_beg": 75,
	"trim_frame_end": 372
}

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/data/configs/retarget_sfu_to_amp.json
================================================
{
    "source_motion": "data/0005_Jogging001.npy",
    "target_motion_path": "data/0005_Jogging001_amp.npy",
    "source_tpose": "data/sfu_tpose.npy",
    "target_tpose": "data/amp_humanoid_tpose.npy",
    "joint_mapping": {
         "Hips": "pelvis",
         "LeftUpLeg": "left_thigh",
         "LeftLeg": "left_shin",
         "LeftFoot": "left_foot",
         "RightUpLeg": "right_thigh",
         "RightLeg": "right_shin",
         "RightFoot": "right_foot",
         "Spine1": "torso",
         "Head": "head",
         "LeftArm": "left_upper_arm",
         "LeftForeArm": "left_lower_arm",
         "LeftHand": "left_hand",
         "RightArm": "right_upper_arm",
         "RightForeArm": "right_lower_arm",
         "RightHand": "right_hand"
    },
    "rotation": [0.5, 0.5, 0.5, 0.5],
    "scale": 0.01,
    "root_height_offset": 0.0,
    "trim_frame_beg": 0,
    "trim_frame_end": 100
}

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/fbx_importer.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import os
import json

from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState, SkeletonMotion
from poselib.visualization.common import plot_skeleton_state, plot_skeleton_motion_interactive

# source fbx file path
fbx_file = "data/01_01_cmu.fbx"

# import fbx file - make sure to provide a valid joint name for root_joint
motion = SkeletonMotion.from_fbx(
    fbx_file_path=fbx_file,
    root_joint="Hips",
    fps=60
)

# save motion in npy format
motion.to_file("data/01_01_cmu.npy")

# visualize motion
plot_skeleton_motion_interactive(motion)


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/generate_amp_humanoid_tpose.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import torch

from poselib.core.rotation3d import *
from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState
from poselib.visualization.common import plot_skeleton_state

"""
This scripts imports a MJCF XML file and converts the skeleton into a SkeletonTree format.
It then generates a zero rotation pose, and adjusts the pose into a T-Pose.
"""

# import MJCF file
xml_path = "../../../../assets/mjcf/amp_humanoid.xml"
skeleton = SkeletonTree.from_mjcf(xml_path)

# generate zero rotation pose
zero_pose = SkeletonState.zero_pose(skeleton)

# adjust pose into a T Pose
local_rotation = zero_pose.local_rotation
local_rotation[skeleton.index("left_upper_arm")] = quat_mul(
    quat_from_angle_axis(angle=torch.tensor([90.0]), axis=torch.tensor([1.0, 0.0, 0.0]), degree=True), 
    local_rotation[skeleton.index("left_upper_arm")]
)
local_rotation[skeleton.index("right_upper_arm")] = quat_mul(
    quat_from_angle_axis(angle=torch.tensor([-90.0]), axis=torch.tensor([1.0, 0.0, 0.0]), degree=True), 
    local_rotation[skeleton.index("right_upper_arm")]
)
translation = zero_pose.root_translation
translation += torch.tensor([0, 0, 0.9])

# save and visualize T-pose
zero_pose.to_file("data/amp_humanoid_tpose.npy")
plot_skeleton_state(zero_pose)

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/mjcf_importer.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState
from poselib.visualization.common import plot_skeleton_state

# load in XML mjcf file and save zero rotation pose in npy format
xml_path = "../../../../assets/mjcf/nv_humanoid.xml"
skeleton = SkeletonTree.from_mjcf(xml_path)
zero_pose = SkeletonState.zero_pose(skeleton)
zero_pose.to_file("data/nv_humanoid.npy")

# visualize zero rotation pose
plot_skeleton_state(zero_pose)

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


__version__ = "0.0.1"

from .core import *


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

from .tensor_utils import *
from .rotation3d import *
from .backend import Serializable, logger


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

from .abstract import Serializable

from .logger import logger


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/abstract.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from abc import ABCMeta, abstractmethod, abstractclassmethod
from collections import OrderedDict
import json

import numpy as np
import os

TENSOR_CLASS = {}


def register(name):
    global TENSOR_CLASS

    def core(tensor_cls):
        TENSOR_CLASS[name] = tensor_cls
        return tensor_cls

    return core


def _get_cls(name):
    global TENSOR_CLASS
    return TENSOR_CLASS[name]


class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """

    def default(self, obj):
        if isinstance(
            obj,
            (
                np.int_,
                np.intc,
                np.intp,
                np.int8,
                np.int16,
                np.int32,
                np.int64,
                np.uint8,
                np.uint16,
                np.uint32,
                np.uint64,
            ),
        ):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
            return float(obj)
        elif isinstance(obj, (np.ndarray,)):
            return dict(__ndarray__=obj.tolist(), dtype=str(obj.dtype), shape=obj.shape)
        return json.JSONEncoder.default(self, obj)


def json_numpy_obj_hook(dct):
    if isinstance(dct, dict) and "__ndarray__" in dct:
        data = np.asarray(dct["__ndarray__"], dtype=dct["dtype"])
        return data.reshape(dct["shape"])
    return dct


class Serializable:
    """ Implementation to read/write to file.
    All class the is inherited from this class needs to implement to_dict() and 
    from_dict()
    """

    @abstractclassmethod
    def from_dict(cls, dict_repr, *args, **kwargs):
        """ Read the object from an ordered dictionary

        :param dict_repr: the ordered dictionary that is used to construct the object
        :type dict_repr: OrderedDict
        :param args, kwargs: the arguments that need to be passed into from_dict()
        :type args, kwargs: additional arguments
        """
        pass

    @abstractmethod
    def to_dict(self):
        """ Construct an ordered dictionary from the object
        
        :rtype: OrderedDict
        """
        pass

    @classmethod
    def from_file(cls, path, *args, **kwargs):
        """ Read the object from a file (either .npy or .json)

        :param path: path of the file
        :type path: string
        :param args, kwargs: the arguments that need to be passed into from_dict()
        :type args, kwargs: additional arguments
        """
        if path.endswith(".json"):
            with open(path, "r") as f:
                d = json.load(f, object_hook=json_numpy_obj_hook)
        elif path.endswith(".npy"):
            d = np.load(path, allow_pickle=True).item()
        else:
            assert False, "failed to load {} from {}".format(cls.__name__, path)
        assert d["__name__"] == cls.__name__, "the file belongs to {}, not {}".format(
            d["__name__"], cls.__name__
        )
        return cls.from_dict(d, *args, **kwargs)

    def to_file(self, path: str) -> None:
        """ Write the object to a file (either .npy or .json)

        :param path: path of the file
        :type path: string
        """
        if os.path.dirname(path) != "" and not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        d = self.to_dict()
        d["__name__"] = self.__class__.__name__
        if path.endswith(".json"):
            with open(path, "w") as f:
                json.dump(d, f, cls=NumpyEncoder, indent=4)
        elif path.endswith(".npy"):
            np.save(path, d)


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/logger.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import logging

logger = logging.getLogger("poselib")
logger.setLevel(logging.INFO)

if not len(logger.handlers):
    formatter = logging.Formatter(
        fmt="%(asctime)-15s - %(levelname)s - %(module)s - %(message)s"
    )
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.info("logger initialized")


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/rotation3d.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from typing import List, Optional

import math
import torch


@torch.jit.script
def quat_mul(a, b):
    """
    quaternion multiplication
    """
    x1, y1, z1, w1 = a[..., 0], a[..., 1], a[..., 2], a[..., 3]
    x2, y2, z2, w2 = b[..., 0], b[..., 1], b[..., 2], b[..., 3]

    w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
    x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
    y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
    z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2

    return torch.stack([x, y, z, w], dim=-1)


@torch.jit.script
def quat_pos(x):
    """
    make all the real part of the quaternion positive
    """
    q = x
    z = (q[..., 3:] < 0).float()
    q = (1 - 2 * z) * q
    return q


@torch.jit.script
def quat_abs(x):
    """
    quaternion norm (unit quaternion represents a 3D rotation, which has norm of 1)
    """
    x = x.norm(p=2, dim=-1)
    return x


@torch.jit.script
def quat_unit(x):
    """
    normalized quaternion with norm of 1
    """
    norm = quat_abs(x).unsqueeze(-1)
    return x / (norm.clamp(min=1e-9))


@torch.jit.script
def quat_conjugate(x):
    """
    quaternion with its imaginary part negated
    """
    return torch.cat([-x[..., :3], x[..., 3:]], dim=-1)


@torch.jit.script
def quat_real(x):
    """
    real component of the quaternion
    """
    return x[..., 3]


@torch.jit.script
def quat_imaginary(x):
    """
    imaginary components of the quaternion
    """
    return x[..., :3]


@torch.jit.script
def quat_norm_check(x):
    """
    verify that a quaternion has norm 1
    """
    assert bool(
        (abs(x.norm(p=2, dim=-1) - 1) < 1e-3).all()
    ), "the quaternion is has non-1 norm: {}".format(abs(x.norm(p=2, dim=-1) - 1))
    assert bool((x[..., 3] >= 0).all()), "the quaternion has negative real part"


@torch.jit.script
def quat_normalize(q):
    """
    Construct 3D rotation from quaternion (the quaternion needs not to be normalized).
    """
    q = quat_unit(quat_pos(q))  # normalized to positive and unit quaternion
    return q


@torch.jit.script
def quat_from_xyz(xyz):
    """
    Construct 3D rotation from the imaginary component
    """
    w = (1.0 - xyz.norm()).unsqueeze(-1)
    assert bool((w >= 0).all()), "xyz has its norm greater than 1"
    return torch.cat([xyz, w], dim=-1)


@torch.jit.script
def quat_identity(shape: List[int]):
    """
    Construct 3D identity rotation given shape
    """
    w = torch.ones(shape + [1])
    xyz = torch.zeros(shape + [3])
    q = torch.cat([xyz, w], dim=-1)
    return quat_normalize(q)


@torch.jit.script
def quat_from_angle_axis(angle, axis, degree: bool = False):
    """ Create a 3D rotation from angle and axis of rotation. The rotation is counter-clockwise 
    along the axis.

    The rotation can be interpreted as a_R_b where frame "b" is the new frame that
    gets rotated counter-clockwise along the axis from frame "a"

    :param angle: angle of rotation
    :type angle: Tensor
    :param axis: axis of rotation
    :type axis: Tensor
    :param degree: put True here if the angle is given by degree
    :type degree: bool, optional, default=False
    """
    if degree:
        angle = angle / 180.0 * math.pi
    theta = (angle / 2).unsqueeze(-1)
    axis = axis / (axis.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-9))
    xyz = axis * theta.sin()
    w = theta.cos()
    return quat_normalize(torch.cat([xyz, w], dim=-1))


@torch.jit.script
def quat_from_rotation_matrix(m):
    """
    Construct a 3D rotation from a valid 3x3 rotation matrices.
    Reference can be found here:
    http://www.cg.info.hiroshima-cu.ac.jp/~miyazaki/knowledge/teche52.html

    :param m: 3x3 orthogonal rotation matrices.
    :type m: Tensor

    :rtype: Tensor
    """
    m = m.unsqueeze(0)
    diag0 = m[..., 0, 0]
    diag1 = m[..., 1, 1]
    diag2 = m[..., 2, 2]

    # Math stuff.
    w = (((diag0 + diag1 + diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
    x = (((diag0 - diag1 - diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
    y = (((-diag0 + diag1 - diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
    z = (((-diag0 - diag1 + diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5

    # Only modify quaternions where w > x, y, z.
    c0 = (w >= x) & (w >= y) & (w >= z)
    x[c0] *= (m[..., 2, 1][c0] - m[..., 1, 2][c0]).sign()
    y[c0] *= (m[..., 0, 2][c0] - m[..., 2, 0][c0]).sign()
    z[c0] *= (m[..., 1, 0][c0] - m[..., 0, 1][c0]).sign()

    # Only modify quaternions where x > w, y, z
    c1 = (x >= w) & (x >= y) & (x >= z)
    w[c1] *= (m[..., 2, 1][c1] - m[..., 1, 2][c1]).sign()
    y[c1] *= (m[..., 1, 0][c1] + m[..., 0, 1][c1]).sign()
    z[c1] *= (m[..., 0, 2][c1] + m[..., 2, 0][c1]).sign()

    # Only modify quaternions where y > w, x, z.
    c2 = (y >= w) & (y >= x) & (y >= z)
    w[c2] *= (m[..., 0, 2][c2] - m[..., 2, 0][c2]).sign()
    x[c2] *= (m[..., 1, 0][c2] + m[..., 0, 1][c2]).sign()
    z[c2] *= (m[..., 2, 1][c2] + m[..., 1, 2][c2]).sign()

    # Only modify quaternions where z > w, x, y.
    c3 = (z >= w) & (z >= x) & (z >= y)
    w[c3] *= (m[..., 1, 0][c3] - m[..., 0, 1][c3]).sign()
    x[c3] *= (m[..., 2, 0][c3] + m[..., 0, 2][c3]).sign()
    y[c3] *= (m[..., 2, 1][c3] + m[..., 1, 2][c3]).sign()

    return quat_normalize(torch.stack([x, y, z, w], dim=-1)).squeeze(0)


@torch.jit.script
def quat_mul_norm(x, y):
    """
    Combine two set of 3D rotations together using \**\* operator. The shape needs to be
    broadcastable
    """
    return quat_normalize(quat_mul(x, y))


@torch.jit.script
def quat_rotate(rot, vec):
    """
    Rotate a 3D vector with the 3D rotation
    """
    other_q = torch.cat([vec, torch.zeros_like(vec[..., :1])], dim=-1)
    return quat_imaginary(quat_mul(quat_mul(rot, other_q), quat_conjugate(rot)))


@torch.jit.script
def quat_inverse(x):
    """
    The inverse of the rotation
    """
    return quat_conjugate(x)


@torch.jit.script
def quat_identity_like(x):
    """
    Construct identity 3D rotation with the same shape
    """
    return quat_identity(x.shape[:-1])


@torch.jit.script
def quat_angle_axis(x):
    """
    The (angle, axis) representation of the rotation. The axis is normalized to unit length.
    The angle is guaranteed to be between [0, pi].
    """
    s = 2 * (x[..., 3] ** 2) - 1
    angle = s.clamp(-1, 1).arccos()  # just to be safe
    axis = x[..., :3]
    axis /= axis.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-9)
    return angle, axis


@torch.jit.script
def quat_yaw_rotation(x, z_up: bool = True):
    """
    Yaw rotation (rotation along z-axis)
    """
    q = x
    if z_up:
        q = torch.cat([torch.zeros_like(q[..., 0:2]), q[..., 2:3], q[..., 3:]], dim=-1)
    else:
        q = torch.cat(
            [
                torch.zeros_like(q[..., 0:1]),
                q[..., 1:2],
                torch.zeros_like(q[..., 2:3]),
                q[..., 3:4],
            ],
            dim=-1,
        )
    return quat_normalize(q)


@torch.jit.script
def transform_from_rotation_translation(
    r: Optional[torch.Tensor] = None, t: Optional[torch.Tensor] = None
):
    """
    Construct a transform from a quaternion and 3D translation. Only one of them can be None.
    """
    assert r is not None or t is not None, "rotation and translation can't be all None"
    if r is None:
        assert t is not None
        r = quat_identity(list(t.shape))
    if t is None:
        t = torch.zeros(list(r.shape) + [3])
    return torch.cat([r, t], dim=-1)


@torch.jit.script
def transform_identity(shape: List[int]):
    """
    Identity transformation with given shape
    """
    r = quat_identity(shape)
    t = torch.zeros(shape + [3])
    return transform_from_rotation_translation(r, t)


@torch.jit.script
def transform_rotation(x):
    """Get rotation from transform"""
    return x[..., :4]


@torch.jit.script
def transform_translation(x):
    """Get translation from transform"""
    return x[..., 4:]


@torch.jit.script
def transform_inverse(x):
    """
    Inverse transformation
    """
    inv_so3 = quat_inverse(transform_rotation(x))
    return transform_from_rotation_translation(
        r=inv_so3, t=quat_rotate(inv_so3, -transform_translation(x))
    )


@torch.jit.script
def transform_identity_like(x):
    """
    identity transformation with the same shape
    """
    return transform_identity(x.shape)


@torch.jit.script
def transform_mul(x, y):
    """
    Combine two transformation together
    """
    z = transform_from_rotation_translation(
        r=quat_mul_norm(transform_rotation(x), transform_rotation(y)),
        t=quat_rotate(transform_rotation(x), transform_translation(y))
        + transform_translation(x),
    )
    return z


@torch.jit.script
def transform_apply(rot, vec):
    """
    Transform a 3D vector
    """
    assert isinstance(vec, torch.Tensor)
    return quat_rotate(transform_rotation(rot), vec) + transform_translation(rot)


@torch.jit.script
def rot_matrix_det(x):
    """
    Return the determinant of the 3x3 matrix. The shape of the tensor will be as same as the
    shape of the matrix
    """
    a, b, c = x[..., 0, 0], x[..., 0, 1], x[..., 0, 2]
    d, e, f = x[..., 1, 0], x[..., 1, 1], x[..., 1, 2]
    g, h, i = x[..., 2, 0], x[..., 2, 1], x[..., 2, 2]
    t1 = a * (e * i - f * h)
    t2 = b * (d * i - f * g)
    t3 = c * (d * h - e * g)
    return t1 - t2 + t3


@torch.jit.script
def rot_matrix_integrity_check(x):
    """
    Verify that a rotation matrix has a determinant of one and is orthogonal
    """
    det = rot_matrix_det(x)
    assert bool((abs(det - 1) < 1e-3).all()), "the matrix has non-one determinant"
    rtr = x @ x.permute(torch.arange(x.dim() - 2), -1, -2)
    rtr_gt = rtr.zeros_like()
    rtr_gt[..., 0, 0] = 1
    rtr_gt[..., 1, 1] = 1
    rtr_gt[..., 2, 2] = 1
    assert bool(((rtr - rtr_gt) < 1e-3).all()), "the matrix is not orthogonal"


@torch.jit.script
def rot_matrix_from_quaternion(q):
    """
    Construct rotation matrix from quaternion
    """
    # Shortcuts for individual elements (using wikipedia's convention)
    qi, qj, qk, qr = q[..., 0], q[..., 1], q[..., 2], q[..., 3]

    # Set individual elements
    R00 = 1.0 - 2.0 * (qj ** 2 + qk ** 2)
    R01 = 2 * (qi * qj - qk * qr)
    R02 = 2 * (qi * qk + qj * qr)
    R10 = 2 * (qi * qj + qk * qr)
    R11 = 1.0 - 2.0 * (qi ** 2 + qk ** 2)
    R12 = 2 * (qj * qk - qi * qr)
    R20 = 2 * (qi * qk - qj * qr)
    R21 = 2 * (qj * qk + qi * qr)
    R22 = 1.0 - 2.0 * (qi ** 2 + qj ** 2)

    R0 = torch.stack([R00, R01, R02], dim=-1)
    R1 = torch.stack([R10, R11, R12], dim=-1)
    R2 = torch.stack([R10, R21, R22], dim=-1)

    R = torch.stack([R0, R1, R2], dim=-2)

    return R


@torch.jit.script
def euclidean_to_rotation_matrix(x):
    """
    Get the rotation matrix on the top-left corner of a Euclidean transformation matrix
    """
    return x[..., :3, :3]


@torch.jit.script
def euclidean_integrity_check(x):
    euclidean_to_rotation_matrix(x)  # check 3d-rotation matrix
    assert bool((x[..., 3, :3] == 0).all()), "the last row is illegal"
    assert bool((x[..., 3, 3] == 1).all()), "the last row is illegal"


@torch.jit.script
def euclidean_translation(x):
    """
    Get the translation vector located at the last column of the matrix
    """
    return x[..., :3, 3]


@torch.jit.script
def euclidean_inverse(x):
    """
    Compute the matrix that represents the inverse rotation
    """
    s = x.zeros_like()
    irot = quat_inverse(quat_from_rotation_matrix(x))
    s[..., :3, :3] = irot
    s[..., :3, 4] = quat_rotate(irot, -euclidean_translation(x))
    return s


@torch.jit.script
def euclidean_to_transform(transformation_matrix):
    """
    Construct a transform from a Euclidean transformation matrix
    """
    return transform_from_rotation_translation(
        r=quat_from_rotation_matrix(
            m=euclidean_to_rotation_matrix(transformation_matrix)
        ),
        t=euclidean_translation(transformation_matrix),
    )


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tensor_utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

from collections import OrderedDict
from .backend import Serializable
import torch


class TensorUtils(Serializable):
    @classmethod
    def from_dict(cls, dict_repr, *args, **kwargs):
        """ Read the object from an ordered dictionary

        :param dict_repr: the ordered dictionary that is used to construct the object
        :type dict_repr: OrderedDict
        :param kwargs: the arguments that need to be passed into from_dict()
        :type kwargs: additional arguments
        """
        return torch.from_numpy(dict_repr["arr"].astype(dict_repr["context"]["dtype"]))

    def to_dict(self):
        """ Construct an ordered dictionary from the object
        
        :rtype: OrderedDict
        """
        return NotImplemented

def tensor_to_dict(x):
    """ Construct an ordered dictionary from the object
    
    :rtype: OrderedDict
    """
    x_np = x.numpy()
    return {
        "arr": x_np,
        "context": {
            "dtype": x_np.dtype.name
        }
    }


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tests/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tests/test_rotation.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from ..rotation3d import *
import numpy as np
import torch

q = torch.from_numpy(np.array([[0, 1, 2, 3], [-2, 3, -1, 5]], dtype=np.float32))
print("q", q)
r = quat_normalize(q)
x = torch.from_numpy(np.array([[1, 0, 0], [0, -1, 0]], dtype=np.float32))
print(r)
print(quat_rotate(r, x))

angle = torch.from_numpy(np.array(np.random.rand() * 10.0, dtype=np.float32))
axis = torch.from_numpy(
    np.array([1, np.random.rand() * 10.0, np.random.rand() * 10.0], dtype=np.float32),
)

print(repr(angle))
print(repr(axis))

rot = quat_from_angle_axis(angle, axis)
x = torch.from_numpy(np.random.rand(5, 6, 3))
y = quat_rotate(quat_inverse(rot), quat_rotate(rot, x))
print(x.numpy())
print(y.numpy())
assert np.allclose(x.numpy(), y.numpy())

m = torch.from_numpy(np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]], dtype=np.float32))
r = quat_from_rotation_matrix(m)
t = torch.from_numpy(np.array([0, 1, 0], dtype=np.float32))
se3 = transform_from_rotation_translation(r=r, t=t)
print(se3)
print(transform_apply(se3, t))

rot = quat_from_angle_axis(
    torch.from_numpy(np.array([45, -54], dtype=np.float32)),
    torch.from_numpy(np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)),
    degree=True,
)
trans = torch.from_numpy(np.array([[1, 1, 0], [1, 1, 0]], dtype=np.float32))
transform = transform_from_rotation_translation(r=rot, t=trans)

t = transform_mul(transform, transform_inverse(transform))
gt = np.zeros((2, 7))
gt[:, 0] = 1.0
print(t.numpy())
print(gt)
# assert np.allclose(t.numpy(), gt)

transform2 = torch.from_numpy(
    np.array(
        [[1, 0, 0, 1], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=np.float32
    ),
)
transform2 = euclidean_to_transform(transform2)
print(transform2)


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/__init__.py
================================================


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/fbx_backend.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


"""
This script reads an fbx file and returns the joint names, parents, and transforms.

NOTE: It requires the Python FBX package to be installed.
"""

import sys

import numpy as np

try:
    import fbx
    import FbxCommon
except ImportError as e:
    print("Error: FBX library failed to load - importing FBX data will not succeed. Message: {}".format(e))
    print("FBX tools must be installed from https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html")


def fbx_to_npy(file_name_in, root_joint_name, fps):
    """
    This function reads in an fbx file, and saves the relevant info to a numpy array

    Fbx files have a series of animation curves, each of which has animations at different 
    times. This script assumes that for mocap data, there is only one animation curve that
    contains all the joints. Otherwise it is unclear how to read in the data.

    If this condition isn't met, then the method throws an error

    :param file_name_in: str, file path in. Should be .fbx file
    :return: nothing, it just writes a file.
    """

    # Create the fbx scene object and load the .fbx file
    fbx_sdk_manager, fbx_scene = FbxCommon.InitializeSdkObjects()
    FbxCommon.LoadScene(fbx_sdk_manager, fbx_scene, file_name_in)

    """
    To read in the animation, we must find the root node of the skeleton.
    
    Unfortunately fbx files can have "scene parents" and other parts of the tree that are 
    not joints
    
    As a crude fix, this reader just takes and finds the first thing which has an 
    animation curve attached
    """

    search_root = (root_joint_name is None or root_joint_name == "")

    # Get the root node of the skeleton, which is the child of the scene's root node
    possible_root_nodes = [fbx_scene.GetRootNode()]
    found_root_node = False
    max_key_count = 0
    root_joint = None
    while len(possible_root_nodes) > 0:
        joint = possible_root_nodes.pop(0)
        if not search_root:
            if joint.GetName() == root_joint_name:
                root_joint = joint
        try:
            curve = _get_animation_curve(joint, fbx_scene)
        except RuntimeError:
            curve = None
        if curve is not None:
            key_count = curve.KeyGetCount()
            if key_count > max_key_count:
                found_root_node = True
                max_key_count = key_count
                root_curve = curve
            if search_root and not root_joint:
                root_joint = joint
        for child_index in range(joint.GetChildCount()):
            possible_root_nodes.append(joint.GetChild(child_index))
    if not found_root_node:
        raise RuntimeError("No root joint found!! Exiting")

    joint_list, joint_names, parents = _get_skeleton(root_joint)

    """
    Read in the transformation matrices of the animation, taking the scaling into account
    """

    anim_range, frame_count, frame_rate = _get_frame_count(fbx_scene)

    local_transforms = []
    #for frame in range(frame_count):
    time_sec = anim_range.GetStart().GetSecondDouble()
    time_range_sec = anim_range.GetStop().GetSecondDouble() - time_sec
    fbx_fps = frame_count / time_range_sec
    if fps != 120:
        fbx_fps = fps
    print("FPS: ", fbx_fps)
    while time_sec < anim_range.GetStop().GetSecondDouble():
        fbx_time = fbx.FbxTime()
        fbx_time.SetSecondDouble(time_sec)
        fbx_time = fbx_time.GetFramedTime()
        transforms_current_frame = []

        # Fbx has a unique time object which you need
        #fbx_time = root_curve.KeyGetTime(frame)
        for joint in joint_list:
            arr = np.array(_recursive_to_list(joint.EvaluateLocalTransform(fbx_time)))
            scales = np.array(_recursive_to_list(joint.EvaluateLocalScaling(fbx_time)))
            if not np.allclose(scales[0:3], scales[0]):
                raise ValueError(
                    "Different X, Y and Z scaling. Unsure how this should be handled. "
                    "To solve this, look at this link and try to upgrade the script "
                    "http://help.autodesk.com/view/FBX/2017/ENU/?guid=__files_GUID_10CDD"
                    "63C_79C1_4F2D_BB28_AD2BE65A02ED_htm"
                )
            # Adjust the array for scaling
            arr /= scales[0]
            arr[3, 3] = 1.0
            transforms_current_frame.append(arr)
        local_transforms.append(transforms_current_frame)

        time_sec += (1.0/fbx_fps)

    local_transforms = np.array(local_transforms)
    print("Frame Count: ", len(local_transforms))

    return joint_names, parents, local_transforms, fbx_fps

def _get_frame_count(fbx_scene):
    # Get the animation stacks and layers, in order to pull off animation curves later
    num_anim_stacks = fbx_scene.GetSrcObjectCount(
        FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId)
    )
    # if num_anim_stacks != 1:
    #     raise RuntimeError(
    #         "More than one animation stack was found. "
    #         "This script must be modified to handle this case. Exiting"
    #     )
    if num_anim_stacks > 1:
        index = 1
    else:
        index = 0
    anim_stack = fbx_scene.GetSrcObject(
        FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId), index
    )

    anim_range = anim_stack.GetLocalTimeSpan()
    duration = anim_range.GetDuration()
    fps = duration.GetFrameRate(duration.GetGlobalTimeMode())
    frame_count = duration.GetFrameCount(True)

    return anim_range, frame_count, fps

def _get_animation_curve(joint, fbx_scene):
    # Get the animation stacks and layers, in order to pull off animation curves later
    num_anim_stacks = fbx_scene.GetSrcObjectCount(
        FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId)
    )
    # if num_anim_stacks != 1:
    #     raise RuntimeError(
    #         "More than one animation stack was found. "
    #         "This script must be modified to handle this case. Exiting"
    #     )
    if num_anim_stacks > 1:
        index = 1
    else:
        index = 0
    anim_stack = fbx_scene.GetSrcObject(
        FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId), index
    )

    num_anim_layers = anim_stack.GetSrcObjectCount(
        FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimLayer.ClassId)
    )
    if num_anim_layers != 1:
        raise RuntimeError(
            "More than one animation layer was found. "
            "This script must be modified to handle this case. Exiting"
        )
    animation_layer = anim_stack.GetSrcObject(
        FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimLayer.ClassId), 0
    )

    def _check_longest_curve(curve, max_curve_key_count):
        longest_curve = None
        if curve and curve.KeyGetCount() > max_curve_key_count[0]:
            max_curve_key_count[0] = curve.KeyGetCount()
            return True

        return False

    max_curve_key_count = [0]
    longest_curve = None
    for c in ["X", "Y", "Z"]:
        curve = joint.LclTranslation.GetCurve(
            animation_layer, c
        )  # sample curve for translation
        if _check_longest_curve(curve, max_curve_key_count):
            longest_curve = curve

        curve = joint.LclRotation.GetCurve(
            animation_layer, "X"
        )
        if _check_longest_curve(curve, max_curve_key_count):
            longest_curve = curve

    return longest_curve


def _get_skeleton(root_joint):

    # Do a depth first search of the skeleton to extract all the joints
    joint_list = [root_joint]
    joint_names = [root_joint.GetName()]
    parents = [-1]  # -1 means no parent

    def append_children(joint, pos):
        """
        Depth first search function
        :param joint: joint item in the fbx
        :param pos: position of current element (for parenting)
        :return: Nothing
        """
        for child_index in range(joint.GetChildCount()):
            child = joint.GetChild(child_index)
            joint_list.append(child)
            joint_names.append(child.GetName())
            parents.append(pos)
            append_children(child, len(parents) - 1)

    append_children(root_joint, 0)
    return joint_list, joint_names, parents


def _recursive_to_list(array):
    """
    Takes some iterable that might contain iterables and converts it to a list of lists 
    [of lists... etc]

    Mainly used for converting the strange fbx wrappers for c++ arrays into python lists
    :param array: array to be converted
    :return: array converted to lists
    """
    try:
        return float(array)
    except TypeError:
        return [_recursive_to_list(a) for a in array]


def parse_fbx(file_name_in, root_joint_name, fps):
    return fbx_to_npy(file_name_in, root_joint_name, fps)


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/fbx_read_wrapper.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

"""
Script that reads in fbx files from python

This requires a configs file, which contains the command necessary to switch conda
environments to run the fbx reading script from python
"""

from ....core import logger

import inspect
import os

import numpy as np

from .fbx_backend import parse_fbx


def fbx_to_array(fbx_file_path, root_joint, fps):
    """
    Reads an fbx file to an array.

    :param fbx_file_path: str, file path to fbx
    :return: tuple with joint_names, parents, transforms, frame time
    """

    # Ensure the file path is valid
    fbx_file_path = os.path.abspath(fbx_file_path)
    assert os.path.exists(fbx_file_path)

    # Parse FBX file
    joint_names, parents, local_transforms, fbx_fps = parse_fbx(fbx_file_path, root_joint, fps)
    return joint_names, parents, local_transforms, fbx_fps


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/skeleton3d.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import xml.etree.ElementTree as ET
from collections import OrderedDict
from typing import List, Optional, Type, Dict

import numpy as np
import torch

from ..core import *
from .backend.fbx.fbx_read_wrapper import fbx_to_array
import scipy.ndimage.filters as filters


class SkeletonTree(Serializable):
    """
    A skeleton tree gives a complete description of a rigid skeleton. It describes a tree structure
    over a list of nodes with their names indicated by strings. Each edge in the tree has a local
    translation associated with it which describes the distance between the two nodes that it
    connects. 

    Basic Usage:
        >>> t = SkeletonTree.from_mjcf(SkeletonTree.__example_mjcf_path__)
        >>> t
        SkeletonTree(
            node_names=['torso', 'front_left_leg', 'aux_1', 'front_left_foot', 'front_right_leg', 'aux_2', 'front_right_foot', 'left_back_leg', 'aux_3', 'left_back_foot', 'right_back_leg', 'aux_4', 'right_back_foot'],
            parent_indices=tensor([-1,  0,  1,  2,  0,  4,  5,  0,  7,  8,  0, 10, 11]),
            local_translation=tensor([[ 0.0000,  0.0000,  0.7500],
                    [ 0.0000,  0.0000,  0.0000],
                    [ 0.2000,  0.2000,  0.0000],
                    [ 0.2000,  0.2000,  0.0000],
                    [ 0.0000,  0.0000,  0.0000],
                    [-0.2000,  0.2000,  0.0000],
                    [-0.2000,  0.2000,  0.0000],
                    [ 0.0000,  0.0000,  0.0000],
                    [-0.2000, -0.2000,  0.0000],
                    [-0.2000, -0.2000,  0.0000],
                    [ 0.0000,  0.0000,  0.0000],
                    [ 0.2000, -0.2000,  0.0000],
                    [ 0.2000, -0.2000,  0.0000]])
        )
        >>> t.node_names
        ['torso', 'front_left_leg', 'aux_1', 'front_left_foot', 'front_right_leg', 'aux_2', 'front_right_foot', 'left_back_leg', 'aux_3', 'left_back_foot', 'right_back_leg', 'aux_4', 'right_back_foot']
        >>> t.parent_indices
        tensor([-1,  0,  1,  2,  0,  4,  5,  0,  7,  8,  0, 10, 11])
        >>> t.local_translation
        tensor([[ 0.0000,  0.0000,  0.7500],
                [ 0.0000,  0.0000,  0.0000],
                [ 0.2000,  0.2000,  0.0000],
                [ 0.2000,  0.2000,  0.0000],
                [ 0.0000,  0.0000,  0.0000],
                [-0.2000,  0.2000,  0.0000],
                [-0.2000,  0.2000,  0.0000],
                [ 0.0000,  0.0000,  0.0000],
                [-0.2000, -0.2000,  0.0000],
                [-0.2000, -0.2000,  0.0000],
                [ 0.0000,  0.0000,  0.0000],
                [ 0.2000, -0.2000,  0.0000],
                [ 0.2000, -0.2000,  0.0000]])
        >>> t.parent_of('front_left_leg')
        'torso'
        >>> t.index('front_right_foot')
        6
        >>> t[2]
        'aux_1'
    """

    __example_mjcf_path__ = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "tests/ant.xml"
    )

    def __init__(self, node_names, parent_indices, local_translation):
        """
        :param node_names: a list of names for each tree node
        :type node_names: List[str]
        :param parent_indices: an int32-typed tensor that represents the edge to its parent.\
        -1 represents the root node
        :type parent_indices: Tensor
        :param local_translation: a 3d vector that gives local translation information
        :type local_translation: Tensor
        """
        ln, lp, ll = len(node_names), len(parent_indices), len(local_translation)
        assert len(set((ln, lp, ll))) == 1
        self._node_names = node_names
        self._parent_indices = parent_indices.long()
        self._local_translation = local_translation
        self._node_indices = {self.node_names[i]: i for i in range(len(self))}

    def __len__(self):
        """ number of nodes in the skeleton tree """
        return len(self.node_names)

    def __iter__(self):
        """ iterator that iterate through the name of each node """
        yield from self.node_names

    def __getitem__(self, item):
        """ get the name of the node given the index """
        return self.node_names[item]

    def __repr__(self):
        return (
            "SkeletonTree(\n    node_names={},\n    parent_indices={},"
            "\n    local_translation={}\n)".format(
                self._indent(repr(self.node_names)),
                self._indent(repr(self.parent_indices)),
                self._indent(repr(self.local_translation)),
            )
        )

    def _indent(self, s):
        return "\n    ".join(s.split("\n"))

    @property
    def node_names(self):
        return self._node_names

    @property
    def parent_indices(self):
        return self._parent_indices

    @property
    def local_translation(self):
        return self._local_translation

    @property
    def num_joints(self):
        """ number of nodes in the skeleton tree """
        return len(self)

    @classmethod
    def from_dict(cls, dict_repr, *args, **kwargs):
        return cls(
            list(map(str, dict_repr["node_names"])),
            TensorUtils.from_dict(dict_repr["parent_indices"], *args, **kwargs),
            TensorUtils.from_dict(dict_repr["local_translation"], *args, **kwargs),
        )

    def to_dict(self):
        return OrderedDict(
            [
                ("node_names", self.node_names),
                ("parent_indices", tensor_to_dict(self.parent_indices)),
                ("local_translation", tensor_to_dict(self.local_translation)),
            ]
        )

    @classmethod
    def from_mjcf(cls, path: str) -> "SkeletonTree":
        """
        Parses a mujoco xml scene description file and returns a Skeleton Tree.
        We use the model attribute at the root as the name of the tree.
        
        :param path:
        :type path: string
        :return: The skeleton tree constructed from the mjcf file
        :rtype: SkeletonTree
        """
        tree = ET.parse(path)
        xml_doc_root = tree.getroot()
        xml_world_body = xml_doc_root.find("worldbody")
        if xml_world_body is None:
            raise ValueError("MJCF parsed incorrectly please verify it.")
        # assume this is the root
        xml_body_root = xml_world_body.find("body")
        if xml_body_root is None:
            raise ValueError("MJCF parsed incorrectly please verify it.")

        node_names = []
        parent_indices = []
        local_translation = []

        # recursively adding all nodes into the skel_tree
        def _add_xml_node(xml_node, parent_index, node_index):
            node_name = xml_node.attrib.get("name")
            # parse the local translation into float list
            pos = np.fromstring(xml_node.attrib.get("pos"), dtype=float, sep=" ")
            node_names.append(node_name)
            parent_indices.append(parent_index)
            local_translation.append(pos)
            curr_index = node_index
            node_index += 1
            for next_node in xml_node.findall("body"):
                node_index = _add_xml_node(next_node, curr_index, node_index)
            return node_index

        _add_xml_node(xml_body_root, -1, 0)

        return cls(
            node_names,
            torch.from_numpy(np.array(parent_indices, dtype=np.int32)),
            torch.from_numpy(np.array(local_translation, dtype=np.float32)),
        )

    def parent_of(self, node_name):
        """ get the name of the parent of the given node

        :param node_name: the name of the node
        :type node_name: string
        :rtype: string
        """
        return self[int(self.parent_indices[self.index(node_name)].item())]

    def index(self, node_name):
        """ get the index of the node
        
        :param node_name: the name of the node
        :type node_name: string
        :rtype: int
        """
        return self._node_indices[node_name]

    def drop_nodes_by_names(
        self, node_names: List[str], pairwise_translation=None
    ) -> "SkeletonTree":
        new_length = len(self) - len(node_names)
        new_node_names = []
        new_local_translation = torch.zeros(
            new_length, 3, dtype=self.local_translation.dtype
        )
        new_parent_indices = torch.zeros(new_length, dtype=self.parent_indices.dtype)
        parent_indices = self.parent_indices.numpy()
        new_node_indices: dict = {}
        new_node_index = 0
        for node_index in range(len(self)):
            if self[node_index] in node_names:
                continue
            tb_node_index = parent_indices[node_index]
            if tb_node_index != -1:
                local_translation = self.local_translation[node_index, :]
                while tb_node_index != -1 and self[tb_node_index] in node_names:
                    local_translation += self.local_translation[tb_node_index, :]
                    tb_node_index = parent_indices[tb_node_index]
                assert tb_node_index != -1, "the root node cannot be dropped"

                if pairwise_translation is not None:
                    local_translation = pairwise_translation[
                        tb_node_index, node_index, :
                    ]
            else:
                local_translation = self.local_translation[node_index, :]

            new_node_names.append(self[node_index])
            new_local_translation[new_node_index, :] = local_translation
            if tb_node_index == -1:
                new_parent_indices[new_node_index] = -1
            else:
                new_parent_indices[new_node_index] = new_node_indices[
                    self[tb_node_index]
                ]
            new_node_indices[self[node_index]] = new_node_index
            new_node_index += 1

        return SkeletonTree(new_node_names, new_parent_indices, new_local_translation)

    def keep_nodes_by_names(
        self, node_names: List[str], pairwise_translation=None
    ) -> "SkeletonTree":
        nodes_to_drop = list(filter(lambda x: x not in node_names, self))
        return self.drop_nodes_by_names(nodes_to_drop, pairwise_translation)


class SkeletonState(Serializable):
    """
    A skeleton state contains all the information needed to describe a static state of a skeleton.
    It requires a skeleton tree, local/global rotation at each joint and the root translation.

    Example:
        >>> t = SkeletonTree.from_mjcf(SkeletonTree.__example_mjcf_path__)
        >>> zero_pose = SkeletonState.zero_pose(t)
        >>> plot_skeleton_state(zero_pose)  # can be imported from `.visualization.common`
        [plot of the ant at zero pose
        >>> local_rotation = zero_pose.local_rotation.clone()
        >>> local_rotation[2] = torch.tensor([0, 0, 1, 0])
        >>> new_pose = SkeletonState.from_rotation_and_root_translation(
        ...             skeleton_tree=t,
        ...             r=local_rotation,
        ...             t=zero_pose.root_translation,
        ...             is_local=True
        ...         )
        >>> new_pose.local_rotation
        tensor([[0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 1., 0., 0.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.]])
        >>> plot_skeleton_state(new_pose)  # you should be able to see one of ant's leg is bent
        [plot of the ant with the new pose
        >>> new_pose.global_rotation  # the local rotation is propagated to the global rotation at joint #3
        tensor([[0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 1., 0., 0.],
                [0., 1., 0., 0.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.]])

    Global/Local Representation (cont. from the previous example)
        >>> new_pose.is_local
        True
        >>> new_pose.tensor  # this will return the local rotation followed by the root translation
        tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
                0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
                0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
                0.])
        >>> new_pose.tensor.shape  # 4 * 13 (joint rotation) + 3 (root translatio
        torch.Size([55])
        >>> new_pose.global_repr().is_local
        False
        >>> new_pose.global_repr().tensor  # this will return the global rotation followed by the root translation instead
        tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0.,
                0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
                0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
                0.])
        >>> new_pose.global_repr().tensor.shape  # 4 * 13 (joint rotation) + 3 (root translation
        torch.Size([55])
    """

    def __init__(self, tensor_backend, skeleton_tree, is_local):
        self._skeleton_tree = skeleton_tree
        self._is_local = is_local
        self.tensor = tensor_backend.clone()

    def __len__(self):
        return self.tensor.shape[0]

    @property
    def rotation(self):
        if not hasattr(self, "_rotation"):
            self._rotation = self.tensor[..., : self.num_joints * 4].reshape(
                *(self.tensor.shape[:-1] + (self.num_joints, 4))
            )
        return self._rotation

    @property
    def _local_rotation(self):
        if self._is_local:
            return self.rotation
        else:
            return None

    @property
    def _global_rotation(self):
        if not self._is_local:
            return self.rotation
        else:
            return None

    @property
    def is_local(self):
        """ is the rotation represented in local frame? 
        
        :rtype: bool
        """
        return self._is_local

    @property
    def invariant_property(self):
        return {"skeleton_tree": self.skeleton_tree, "is_local": self.is_local}

    @property
    def num_joints(self):
        """ number of joints in the skeleton tree 
        
        :rtype: int
        """
        return self.skeleton_tree.num_joints

    @property
    def skeleton_tree(self):
        """ skeleton tree 
        
        :rtype: SkeletonTree
        """
        return self._skeleton_tree

    @property
    def root_translation(self):
        """ root translation 
        
        :rtype: Tensor
        """
        if not hasattr(self, "_root_translation"):
            self._root_translation = self.tensor[
                ..., self.num_joints * 4 : self.num_joints * 4 + 3
            ]
        return self._root_translation

    @property
    def global_transformation(self):
        """ global transformation of each joint (transform from joint frame to global frame) """
        if not hasattr(self, "_global_transformation"):
            local_transformation = self.local_transformation
            global_transformation = []
            parent_indices = self.skeleton_tree.parent_indices.numpy()
            # global_transformation = local_transformation.identity_like()
            for node_index in range(len(self.skeleton_tree)):
                parent_index = parent_indices[node_index]
                if parent_index == -1:
                    global_transformation.append(
                        local_transformation[..., node_index, :]
                    )
                else:
                    global_transformation.append(
                        transform_mul(
                            global_transformation[parent_index],
                            local_transformation[..., node_index, :],
                        )
                    )
            self._global_transformation = torch.stack(global_transformation, axis=-2)
        return self._global_transformation

    @property
    def global_rotation(self):
        """ global rotation of each joint (rotation matrix to rotate from joint's F.O.R to global
        F.O.R) """
        if self._global_rotation is None:
            if not hasattr(self, "_comp_global_rotation"):
                self._comp_global_rotation = transform_rotation(
                    self.global_transformation
                )
            return self._comp_global_rotation
        else:
            return self._global_rotation

    @property
    def global_translation(self):
        """ global translation of each joint """
        if not hasattr(self, "_global_translation"):
            self._global_translation = transform_translation(self.global_transformation)
        return self._global_translation

    @property
    def global_translation_xy(self):
        """ global translation in xy """
        trans_xy_data = self.global_translation.zeros_like()
        trans_xy_data[..., 0:2] = self.global_translation[..., 0:2]
        return trans_xy_data

    @property
    def global_translation_xz(self):
        """ global translation in xz """
        trans_xz_data = self.global_translation.zeros_like()
        trans_xz_data[..., 0:1] = self.global_translation[..., 0:1]
        trans_xz_data[..., 2:3] = self.global_translation[..., 2:3]
        return trans_xz_data

    @property
    def local_rotation(self):
        """ the rotation from child frame to parent frame given in the order of child nodes appeared
        in `.skeleton_tree.node_names` """
        if self._local_rotation is None:
            if not hasattr(self, "_comp_local_rotation"):
                local_rotation = quat_identity_like(self.global_rotation)
                for node_index in range(len(self.skeleton_tree)):
                    parent_index = self.skeleton_tree.parent_indices[node_index]
                    if parent_index == -1:
                        local_rotation[..., node_index, :] = self.global_rotation[
                            ..., node_index, :
                        ]
                    else:
                        local_rotation[..., node_index, :] = quat_mul_norm(
                            quat_inverse(self.global_rotation[..., parent_index, :]),
                            self.global_rotation[..., node_index, :],
                        )
                self._comp_local_rotation = local_rotation
            return self._comp_local_rotation
        else:
            return self._local_rotation

    @property
    def local_transformation(self):
        """ local translation + local rotation. It describes the transformation from child frame to 
        parent frame given in the order of child nodes appeared in `.skeleton_tree.node_names` """
        if not hasattr(self, "_local_transformation"):
            self._local_transformation = transform_from_rotation_translation(
                r=self.local_rotation, t=self.local_translation
            )
        return self._local_transformation

    @property
    def local_translation(self):
        """ local translation of the skeleton state. It is identical to the local translation in
        `.skeleton_tree.local_translation` except the root translation. The root translation is
        identical to `.root_translation` """
        if not hasattr(self, "_local_translation"):
            broadcast_shape = (
                tuple(self.tensor.shape[:-1])
                + (len(self.skeleton_tree),)
                + tuple(self.skeleton_tree.local_translation.shape[-1:])
            )
            local_translation = self.skeleton_tree.local_translation.broadcast_to(
                *broadcast_shape
            ).clone()
            local_translation[..., 0, :] = self.root_translation
            self._local_translation = local_translation
        return self._local_translation

    # Root Properties
    @property
    def root_translation_xy(self):
        """ root translation on xy """
        if not hasattr(self, "_root_translation_xy"):
            self._root_translation_xy = self.global_translation_xy[..., 0, :]
        return self._root_translation_xy

    @property
    def global_root_rotation(self):
        """ root rotation """
        if not hasattr(self, "_global_root_rotation"):
            self._global_root_rotation = self.global_rotation[..., 0, :]
        return self._global_root_rotation

    @property
    def global_root_yaw_rotation(self):
        """ root yaw rotation """
        if not hasattr(self, "_global_root_yaw_rotation"):
            self._global_root_yaw_rotation = self.global_root_rotation.yaw_rotation()
        return self._global_root_yaw_rotation

    # Properties relative to root
    @property
    def local_translation_to_root(self):
        """ The 3D translation from joint frame to the root frame. """
        if not hasattr(self, "_local_translation_to_root"):
            self._local_translation_to_root = (
                self.global_translation - self.root_translation.unsqueeze(-1)
            )
        return self._local_translation_to_root

    @property
    def local_rotation_to_root(self):
        """ The 3D rotation from joint frame to the root frame. It is equivalent to 
        The root_R_world * world_R_node """
        return (
            quat_inverse(self.global_root_rotation).unsqueeze(-1) * self.global_rotation
        )

    def compute_forward_vector(
        self,
        left_shoulder_index,
        right_shoulder_index,
        left_hip_index,
        right_hip_index,
        gaussian_filter_width=20,
    ):
        """ Computes forward vector based on cross product of the up vector with 
        average of the right->left shoulder and hip vectors """
        global_positions = self.global_translation
        # Perpendicular to the forward direction.
        # Uses the shoulders and hips to find this.
        side_direction = (
            global_positions[:, left_shoulder_index].numpy()
            - global_positions[:, right_shoulder_index].numpy()
            + global_positions[:, left_hip_index].numpy()
            - global_positions[:, right_hip_index].numpy()
        )
        side_direction = (
            side_direction
            / np.sqrt((side_direction ** 2).sum(axis=-1))[..., np.newaxis]
        )

        # Forward direction obtained by crossing with the up direction.
        forward_direction = np.cross(side_direction, np.array([[0, 1, 0]]))

        # Smooth the forward direction with a Gaussian.
        # Axis 0 is the time/frame axis.
        forward_direction = filters.gaussian_filter1d(
            forward_direction, gaussian_filter_width, axis=0, mode="nearest"
        )
        forward_direction = (
            forward_direction
            / np.sqrt((forward_direction ** 2).sum(axis=-1))[..., np.newaxis]
        )

        return torch.from_numpy(forward_direction)

    @staticmethod
    def _to_state_vector(rot, rt):
        state_shape = rot.shape[:-2]
        vr = rot.reshape(*(state_shape + (-1,)))
        vt = rt.broadcast_to(*state_shape + rt.shape[-1:]).reshape(
            *(state_shape + (-1,))
        )
        v = torch.cat([vr, vt], axis=-1)
        return v

    @classmethod
    def from_dict(
        cls: Type["SkeletonState"], dict_repr: OrderedDict, *args, **kwargs
    ) -> "SkeletonState":
        rot = TensorUtils.from_dict(dict_repr["rotation"], *args, **kwargs)
        rt = TensorUtils.from_dict(dict_repr["root_translation"], *args, **kwargs)
        return cls(
            SkeletonState._to_state_vector(rot, rt),
            SkeletonTree.from_dict(dict_repr["skeleton_tree"], *args, **kwargs),
            dict_repr["is_local"],
        )

    def to_dict(self) -> OrderedDict:
        return OrderedDict(
            [
                ("rotation", tensor_to_dict(self.rotation)),
                ("root_translation", tensor_to_dict(self.root_translation)),
                ("skeleton_tree", self.skeleton_tree.to_dict()),
                ("is_local", self.is_local),
            ]
        )

    @classmethod
    def from_rotation_and_root_translation(cls, skeleton_tree, r, t, is_local=True):
        """
        Construct a skeleton state from rotation and root translation

        :param skeleton_tree: the skeleton tree
        :type skeleton_tree: SkeletonTree
        :param r: rotation (either global or local)
        :type r: Tensor
        :param t: root translation
        :type t: Tensor
        :param is_local: to indicate that whether the rotation is local or global
        :type is_local: bool, optional, default=True
        """
        assert (
            r.dim() > 0
        ), "the rotation needs to have at least 1 dimension (dim = {})".format(r.dim)
        return cls(
            SkeletonState._to_state_vector(r, t),
            skeleton_tree=skeleton_tree,
            is_local=is_local,
        )

    @classmethod
    def zero_pose(cls, skeleton_tree):
        """
        Construct a zero-pose skeleton state from the skeleton tree by assuming that all the local
        rotation is 0 and root translation is also 0.

        :param skeleton_tree: the skeleton tree as the rigid body
        :type skeleton_tree: SkeletonTree
        """
        return cls.from_rotation_and_root_translation(
            skeleton_tree=skeleton_tree,
            r=quat_identity([skeleton_tree.num_joints]),
            t=torch.zeros(3, dtype=skeleton_tree.local_translation.dtype),
            is_local=True,
        )

    def local_repr(self):
        """ 
        Convert the skeleton state into local representation. This will only affects the values of
        .tensor. If the skeleton state already has `is_local=True`. This method will do nothing. 

        :rtype: SkeletonState
        """
        if self.is_local:
            return self
        return SkeletonState.from_rotation_and_root_translation(
            self.skeleton_tree,
            r=self.local_rotation,
            t=self.root_translation,
            is_local=True,
        )

    def global_repr(self):
        """ 
        Convert the skeleton state into global representation. This will only affects the values of
        .tensor. If the skeleton state already has `is_local=False`. This method will do nothing. 

        :rtype: SkeletonState
        """
        if not self.is_local:
            return self
        return SkeletonState.from_rotation_and_root_translation(
            self.skeleton_tree,
            r=self.global_rotation,
            t=self.root_translation,
            is_local=False,
        )

    def _get_pairwise_average_translation(self):
        global_transform_inv = transform_inverse(self.global_transformation)
        p1 = global_transform_inv.unsqueeze(-2)
        p2 = self.global_transformation.unsqueeze(-3)

        pairwise_translation = (
            transform_translation(transform_mul(p1, p2))
            .reshape(-1, len(self.skeleton_tree), len(self.skeleton_tree), 3)
            .mean(axis=0)
        )
        return pairwise_translation

    def _transfer_to(self, new_skeleton_tree: SkeletonTree):
        old_indices = list(map(self.skeleton_tree.index, new_skeleton_tree))
        return SkeletonState.from_rotation_and_root_translation(
            new_skeleton_tree,
            r=self.global_rotation[..., old_indices, :],
            t=self.root_translation,
            is_local=False,
        )

    def drop_nodes_by_names(
        self, node_names: List[str], estimate_local_translation_from_states: bool = True
    ) -> "SkeletonState":
        """ 
        Drop a list of nodes from the skeleton and re-compute the local rotation to match the 
        original joint position as much as possible. 

        :param node_names: a list node names that specifies the nodes need to be dropped
        :type node_names: List of strings
        :param estimate_local_translation_from_states: the boolean indicator that specifies whether\
        or not to re-estimate the local translation from the states (avg.)
        :type estimate_local_translation_from_states: boolean
        :rtype: SkeletonState
        """
        if estimate_local_translation_from_states:
            pairwise_translation = self._get_pairwise_average_translation()
        else:
            pairwise_translation = None
        new_skeleton_tree = self.skeleton_tree.drop_nodes_by_names(
            node_names, pairwise_translation
        )
        return self._transfer_to(new_skeleton_tree)

    def keep_nodes_by_names(
        self, node_names: List[str], estimate_local_translation_from_states: bool = True
    ) -> "SkeletonState":
        """ 
        Keep a list of nodes and drop all other nodes from the skeleton and re-compute the local 
        rotation to match the original joint position as much as possible. 

        :param node_names: a list node names that specifies the nodes need to be dropped
        :type node_names: List of strings
        :param estimate_local_translation_from_states: the boolean indicator that specifies whether\
        or not to re-estimate the local translation from the states (avg.)
        :type estimate_local_translation_from_states: boolean
        :rtype: SkeletonState
        """
        return self.drop_nodes_by_names(
            list(filter(lambda x: (x not in node_names), self)),
            estimate_local_translation_from_states,
        )

    def _remapped_to(
        self, joint_mapping: Dict[str, str], target_skeleton_tree: SkeletonTree
    ):
        joint_mapping_inv = {target: source for source, target in joint_mapping.items()}
        reduced_target_skeleton_tree = target_skeleton_tree.keep_nodes_by_names(
            list(joint_mapping_inv)
        )
        n_joints = (
            len(joint_mapping),
            len(self.skeleton_tree),
            len(reduced_target_skeleton_tree),
        )
        assert (
            len(set(n_joints)) == 1
        ), "the joint mapping is not consistent with the skeleton trees"
        source_indices = list(
            map(
                lambda x: self.skeleton_tree.index(joint_mapping_inv[x]),
                reduced_target_skeleton_tree,
            )
        )
        target_local_rotation = self.local_rotation[..., source_indices, :]
        return SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=reduced_target_skeleton_tree,
            r=target_local_rotation,
            t=self.root_translation,
            is_local=True,
        )

    def retarget_to(
        self,
        joint_mapping: Dict[str, str],
        source_tpose_local_rotation,
        source_tpose_root_translation: np.ndarray,
        target_skeleton_tree: SkeletonTree,
        target_tpose_local_rotation,
        target_tpose_root_translation: np.ndarray,
        rotation_to_target_skeleton,
        scale_to_target_skeleton: float,
        z_up: bool = True,
    ) -> "SkeletonState":
        """ 
        Retarget the skeleton state to a target skeleton tree. This is a naive retarget
        implementation with rough approximations. The function follows the procedures below.

        Steps:
            1. Drop the joints from the source (self) that do not belong to the joint mapping\
            with an implementation that is similar to "keep_nodes_by_names()" - take a\
            look at the function doc for more details (same for source_tpose)
            
            2. Rotate the source state and the source tpose by "rotation_to_target_skeleton"\
            to align the source with the target orientation
            
            3. Extract the root translation and normalize it to match the scale of the target\
            skeleton
            
            4. Extract the global rotation from source state relative to source tpose and\
            re-apply the relative rotation to the target tpose to construct the global\
            rotation after retargetting
            
            5. Combine the computed global rotation and the root translation from 3 and 4 to\
            complete the retargeting.
            
            6. Make feet on the ground (global translation z)

        :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
        the target skeleton
        :type joint_mapping: Dict[str, str]
        
        :param source_tpose_local_rotation: the local rotation of the source skeleton
        :type source_tpose_local_rotation: Tensor
        
        :param source_tpose_root_translation: the root translation of the source tpose
        :type source_tpose_root_translation: np.ndarray
        
        :param target_skeleton_tree: the target skeleton tree
        :type target_skeleton_tree: SkeletonTree
        
        :param target_tpose_local_rotation: the local rotation of the target skeleton
        :type target_tpose_local_rotation: Tensor
        
        :param target_tpose_root_translation: the root translation of the target tpose
        :type target_tpose_root_translation: Tensor
        
        :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
        skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
        the frame of reference of the target skeleton and s is the frame of reference of the source\
        skeleton
        :type rotation_to_target_skeleton: Tensor
        :param scale_to_target_skeleton: the factor that needs to be multiplied from source\
        skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
        factor needs to be 0.01.
        :type scale_to_target_skeleton: float
        :rtype: SkeletonState
        """

        # STEP 0: Preprocess
        source_tpose = SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=self.skeleton_tree,
            r=source_tpose_local_rotation,
            t=source_tpose_root_translation,
            is_local=True,
        )
        target_tpose = SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=target_skeleton_tree,
            r=target_tpose_local_rotation,
            t=target_tpose_root_translation,
            is_local=True,
        )

        # STEP 1: Drop the irrelevant joints
        pairwise_translation = self._get_pairwise_average_translation()
        node_names = list(joint_mapping)
        new_skeleton_tree = self.skeleton_tree.keep_nodes_by_names(
            node_names, pairwise_translation
        )

        # TODO: combine the following steps before STEP 3
        source_tpose = source_tpose._transfer_to(new_skeleton_tree)
        source_state = self._transfer_to(new_skeleton_tree)

        source_tpose = source_tpose._remapped_to(joint_mapping, target_skeleton_tree)
        source_state = source_state._remapped_to(joint_mapping, target_skeleton_tree)

        # STEP 2: Rotate the source to align with the target
        new_local_rotation = source_tpose.local_rotation.clone()
        new_local_rotation[..., 0, :] = quat_mul_norm(
            rotation_to_target_skeleton, source_tpose.local_rotation[..., 0, :]
        )

        source_tpose = SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=source_tpose.skeleton_tree,
            r=new_local_rotation,
            t=quat_rotate(rotation_to_target_skeleton, source_tpose.root_translation),
            is_local=True,
        )

        new_local_rotation = source_state.local_rotation.clone()
        new_local_rotation[..., 0, :] = quat_mul_norm(
            rotation_to_target_skeleton, source_state.local_rotation[..., 0, :]
        )
        source_state = SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=source_state.skeleton_tree,
            r=new_local_rotation,
            t=quat_rotate(rotation_to_target_skeleton, source_state.root_translation),
            is_local=True,
        )

        # STEP 3: Normalize to match the target scale
        root_translation_diff = (
            source_state.root_translation - source_tpose.root_translation
        ) * scale_to_target_skeleton
        # STEP 4: the global rotation from source state relative to source tpose and
        # re-apply to the target
        current_skeleton_tree = source_state.skeleton_tree
        target_tpose_global_rotation = source_state.global_rotation[0, :].clone()
        for current_index, name in enumerate(current_skeleton_tree):
            if name in target_tpose.skeleton_tree:
                target_tpose_global_rotation[
                    current_index, :
                ] = target_tpose.global_rotation[
                    target_tpose.skeleton_tree.index(name), :
                ]

        global_rotation_diff = quat_mul_norm(
            source_state.global_rotation, quat_inverse(source_tpose.global_rotation)
        )
        new_global_rotation = quat_mul_norm(
            global_rotation_diff, target_tpose_global_rotation
        )

        # STEP 5: Putting 3 and 4 together
        current_skeleton_tree = source_state.skeleton_tree
        shape = source_state.global_rotation.shape[:-1]
        shape = shape[:-1] + target_tpose.global_rotation.shape[-2:-1]
        new_global_rotation_output = quat_identity(shape)
        for current_index, name in enumerate(target_skeleton_tree):
            while name not in current_skeleton_tree:
                name = target_skeleton_tree.parent_of(name)
            parent_index = current_skeleton_tree.index(name)
            new_global_rotation_output[:, current_index, :] = new_global_rotation[
                :, parent_index, :
            ]

        source_state = SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=target_skeleton_tree,
            r=new_global_rotation_output,
            t=target_tpose.root_translation + root_translation_diff,
            is_local=False,
        ).local_repr()

        return source_state

    def retarget_to_by_tpose(
        self,
        joint_mapping: Dict[str, str],
        source_tpose: "SkeletonState",
        target_tpose: "SkeletonState",
        rotation_to_target_skeleton,
        scale_to_target_skeleton: float,
    ) -> "SkeletonState":
        """ 
        Retarget the skeleton state to a target skeleton tree. This is a naive retarget
        implementation with rough approximations. See the method `retarget_to()` for more information

        :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
        the target skeleton
        :type joint_mapping: Dict[str, str]
        
        :param source_tpose: t-pose of the source skeleton
        :type source_tpose: SkeletonState
        
        :param target_tpose: t-pose of the target skeleton
        :type target_tpose: SkeletonState
        
        :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
        skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
        the frame of reference of the target skeleton and s is the frame of reference of the source\
        skeleton
        :type rotation_to_target_skeleton: Tensor
        :param scale_to_target_skeleton: the factor that needs to be multiplied from source\
        skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
        factor needs to be 0.01.
        :type scale_to_target_skeleton: float
        :rtype: SkeletonState
        """
        assert (
            len(source_tpose.shape) == 0 and len(target_tpose.shape) == 0
        ), "the retargeting script currently doesn't support vectorized operations"
        return self.retarget_to(
            joint_mapping,
            source_tpose.local_rotation,
            source_tpose.root_translation,
            target_tpose.skeleton_tree,
            target_tpose.local_rotation,
            target_tpose.root_translation,
            rotation_to_target_skeleton,
            scale_to_target_skeleton,
        )


class SkeletonMotion(SkeletonState):
    def __init__(self, tensor_backend, skeleton_tree, is_local, fps, *args, **kwargs):
        self._fps = fps
        super().__init__(tensor_backend, skeleton_tree, is_local, *args, **kwargs)

    def clone(self):
        return SkeletonMotion(
            self.tensor.clone(), self.skeleton_tree, self._is_local, self._fps
        )

    @property
    def invariant_property(self):
        return {
            "skeleton_tree": self.skeleton_tree,
            "is_local": self.is_local,
            "fps": self.fps,
        }

    @property
    def global_velocity(self):
        """ global velocity """
        curr_index = self.num_joints * 4 + 3
        return self.tensor[..., curr_index : curr_index + self.num_joints * 3].reshape(
            *(self.tensor.shape[:-1] + (self.num_joints, 3))
        )

    @property
    def global_angular_velocity(self):
        """ global angular velocity """
        curr_index = self.num_joints * 7 + 3
        return self.tensor[..., curr_index : curr_index + self.num_joints * 3].reshape(
            *(self.tensor.shape[:-1] + (self.num_joints, 3))
        )

    @property
    def fps(self):
        """ number of frames per second """
        return self._fps

    @property
    def time_delta(self):
        """ time between two adjacent frames """
        return 1.0 / self.fps

    @property
    def global_root_velocity(self):
        """ global root velocity """
        return self.global_velocity[..., 0, :]

    @property
    def global_root_angular_velocity(self):
        """ global root angular velocity """
        return self.global_angular_velocity[..., 0, :]

    @classmethod
    def from_state_vector_and_velocity(
        cls,
        skeleton_tree,
        state_vector,
        global_velocity,
        global_angular_velocity,
        is_local,
        fps,
    ):
        """
        Construct a skeleton motion from a skeleton state vector, global velocity and angular
        velocity at each joint.

        :param skeleton_tree: the skeleton tree that the motion is based on 
        :type skeleton_tree: SkeletonTree
        :param state_vector: the state vector from the skeleton state by `.tensor`
        :type state_vector: Tensor
        :param global_velocity: the global velocity at each joint
        :type global_velocity: Tensor
        :param global_angular_velocity: the global angular velocity at each joint
        :type global_angular_velocity: Tensor
        :param is_local: if the rotation ins the state vector is given in local frame
        :type is_local: boolean
        :param fps: number of frames per second
        :type fps: int

        :rtype: SkeletonMotion
        """
        state_shape = state_vector.shape[:-1]
        v = global_velocity.reshape(*(state_shape + (-1,)))
        av = global_angular_velocity.reshape(*(state_shape + (-1,)))
        new_state_vector = torch.cat([state_vector, v, av], axis=-1)
        return cls(
            new_state_vector, skeleton_tree=skeleton_tree, is_local=is_local, fps=fps,
        )

    @classmethod
    def from_skeleton_state(
        cls: Type["SkeletonMotion"], skeleton_state: SkeletonState, fps: int
    ):
        """
        Construct a skeleton motion from a skeleton state. The velocities are estimated using second
        order gaussian filter along the last axis. The skeleton state must have at least .dim >= 1

        :param skeleton_state: the skeleton state that the motion is based on 
        :type skeleton_state: SkeletonState
        :param fps: number of frames per second
        :type fps: int

        :rtype: SkeletonMotion
        """
        assert (
            type(skeleton_state) == SkeletonState
        ), "expected type of {}, got {}".format(SkeletonState, type(skeleton_state))
        global_velocity = SkeletonMotion._compute_velocity(
            p=skeleton_state.global_translation, time_delta=1 / fps
        )
        global_angular_velocity = SkeletonMotion._compute_angular_velocity(
            r=skeleton_state.global_rotation, time_delta=1 / fps
        )
        return cls.from_state_vector_and_velocity(
            skeleton_tree=skeleton_state.skeleton_tree,
            state_vector=skeleton_state.tensor,
            global_velocity=global_velocity,
            global_angular_velocity=global_angular_velocity,
            is_local=skeleton_state.is_local,
            fps=fps,
        )

    @staticmethod
    def _to_state_vector(rot, rt, vel, avel):
        state_shape = rot.shape[:-2]
        skeleton_state_v = SkeletonState._to_state_vector(rot, rt)
        v = vel.reshape(*(state_shape + (-1,)))
        av = avel.reshape(*(state_shape + (-1,)))
        skeleton_motion_v = torch.cat([skeleton_state_v, v, av], axis=-1)
        return skeleton_motion_v

    @classmethod
    def from_dict(
        cls: Type["SkeletonMotion"], dict_repr: OrderedDict, *args, **kwargs
    ) -> "SkeletonMotion":
        rot = TensorUtils.from_dict(dict_repr["rotation"], *args, **kwargs)
        rt = TensorUtils.from_dict(dict_repr["root_translation"], *args, **kwargs)
        vel = TensorUtils.from_dict(dict_repr["global_velocity"], *args, **kwargs)
        avel = TensorUtils.from_dict(
            dict_repr["global_angular_velocity"], *args, **kwargs
        )
        return cls(
            SkeletonMotion._to_state_vector(rot, rt, vel, avel),
            skeleton_tree=SkeletonTree.from_dict(
                dict_repr["skeleton_tree"], *args, **kwargs
            ),
            is_local=dict_repr["is_local"],
            fps=dict_repr["fps"],
        )

    def to_dict(self) -> OrderedDict:
        return OrderedDict(
            [
                ("rotation", tensor_to_dict(self.rotation)),
                ("root_translation", tensor_to_dict(self.root_translation)),
                ("global_velocity", tensor_to_dict(self.global_velocity)),
                ("global_angular_velocity", tensor_to_dict(self.global_angular_velocity)),
                ("skeleton_tree", self.skeleton_tree.to_dict()),
                ("is_local", self.is_local),
                ("fps", self.fps),
            ]
        )

    @classmethod
    def from_fbx(
        cls: Type["SkeletonMotion"],
        fbx_file_path,
        skeleton_tree=None,
        is_local=True,
        fps=120,
        root_joint="",
        root_trans_index=0,
        *args,
        **kwargs,
    ) -> "SkeletonMotion":
        """
        Construct a skeleton motion from a fbx file (TODO - generalize this). If the skeleton tree
        is not given, it will use the first frame of the mocap to construct the skeleton tree.

        :param fbx_file_path: the path of the fbx file
        :type fbx_file_path: string
        :param fbx_configs: the configuration in terms of {"tmp_path": ..., "fbx_py27_path": ...}
        :type fbx_configs: dict
        :param skeleton_tree: the optional skeleton tree that the rotation will be applied to
        :type skeleton_tree: SkeletonTree, optional
        :param is_local: the state vector uses local or global rotation as the representation
        :type is_local: bool, optional, default=True
        :param fps: FPS of the FBX animation
        :type fps: int, optional, default=120
        :param root_joint: the name of the root joint for the skeleton
        :type root_joint: string, optional, default="" or the first node in the FBX scene with animation data
        :param root_trans_index: index of joint to extract root transform from
        :type root_trans_index: int, optional, default=0 or the root joint in the parsed skeleton
        :rtype: SkeletonMotion
        """
        joint_names, joint_parents, transforms, fps = fbx_to_array(
            fbx_file_path, root_joint, fps
        )
        # swap the last two axis to match the convention
        local_transform = euclidean_to_transform(
            transformation_matrix=torch.from_numpy(
                np.swapaxes(np.array(transforms), -1, -2),
            ).float()
        )
        local_rotation = transform_rotation(local_transform)
        root_translation = transform_translation(local_transform)[..., root_trans_index, :]
        joint_parents = torch.from_numpy(np.array(joint_parents)).int()

        if skeleton_tree is None:
            local_translation = transform_translation(local_transform).reshape(
                -1, len(joint_parents), 3
            )[0]
            skeleton_tree = SkeletonTree(joint_names, joint_parents, local_translation)
        skeleton_state = SkeletonState.from_rotation_and_root_translation(
            skeleton_tree, r=local_rotation, t=root_translation, is_local=True
        )
        if not is_local:
            skeleton_state = skeleton_state.global_repr()
        return cls.from_skeleton_state(
            skeleton_state=skeleton_state, fps=fps
        )

    @staticmethod
    def _compute_velocity(p, time_delta, guassian_filter=True):
        velocity = torch.from_numpy(
            filters.gaussian_filter1d(
                np.gradient(p.numpy(), axis=-3), 2, axis=-3, mode="nearest"
            )
            / time_delta,
        )
        return velocity

    @staticmethod
    def _compute_angular_velocity(r, time_delta: float, guassian_filter=True):
        # assume the second last dimension is the time axis
        diff_quat_data = quat_identity_like(r)
        diff_quat_data[..., :-1, :, :] = quat_mul_norm(
            r[..., 1:, :, :], quat_inverse(r[..., :-1, :, :])
        )
        diff_angle, diff_axis = quat_angle_axis(diff_quat_data)
        angular_velocity = diff_axis * diff_angle.unsqueeze(-1) / time_delta
        angular_velocity = torch.from_numpy(
            filters.gaussian_filter1d(
                angular_velocity.numpy(), 2, axis=-3, mode="nearest"
            ),
        )
        return angular_velocity

    def crop(self, start: int, end: int, fps: Optional[int] = None):
        """
        Crop the motion along its last axis. This is equivalent to performing a slicing on the
        object with [..., start: end: skip_every] where skip_every = old_fps / fps. Note that the
        new fps provided must be a factor of the original fps. 

        :param start: the beginning frame index
        :type start: int
        :param end: the ending frame index
        :type end: int
        :param fps: number of frames per second in the output (if not given the original fps will be used)
        :type fps: int, optional
        :rtype: SkeletonMotion
        """
        if fps is None:
            new_fps = int(self.fps)
            old_fps = int(self.fps)
        else:
            new_fps = int(fps)
            old_fps = int(self.fps)
            assert old_fps % fps == 0, (
                "the resampling doesn't support fps with non-integer division "
                "from the original fps: {} => {}".format(old_fps, fps)
            )
        skip_every = old_fps // new_fps
        return SkeletonMotion.from_skeleton_state(
          SkeletonState.from_rotation_and_root_translation(
            skeleton_tree=self.skeleton_tree,
            t=self.root_translation[start:end:skip_every],
            r=self.local_rotation[start:end:skip_every],
            is_local=True
          ),
          fps=self.fps
        )

    def retarget_to(
        self,
        joint_mapping: Dict[str, str],
        source_tpose_local_rotation,
        source_tpose_root_translation: np.ndarray,
        target_skeleton_tree: "SkeletonTree",
        target_tpose_local_rotation,
        target_tpose_root_translation: np.ndarray,
        rotation_to_target_skeleton,
        scale_to_target_skeleton: float,
        z_up: bool = True,
    ) -> "SkeletonMotion":
        """ 
        Same as the one in :class:`SkeletonState`. This method discards all velocity information before
        retargeting and re-estimate the velocity after the retargeting. The same fps is used in the
        new retargetted motion.

        :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
        the target skeleton
        :type joint_mapping: Dict[str, str]
        
        :param source_tpose_local_rotation: the local rotation of the source skeleton
        :type source_tpose_local_rotation: Tensor
        
        :param source_tpose_root_translation: the root translation of the source tpose
        :type source_tpose_root_translation: np.ndarray
        
        :param target_skeleton_tree: the target skeleton tree
        :type target_skeleton_tree: SkeletonTree
        
        :param target_tpose_local_rotation: the local rotation of the target skeleton
        :type target_tpose_local_rotation: Tensor
        
        :param target_tpose_root_translation: the root translation of the target tpose
        :type target_tpose_root_translation: Tensor
        
        :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
        skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
        the frame of reference of the target skeleton and s is the frame of reference of the source\
        skeleton
        :type rotation_to_target_skeleton: Tensor
        :param scale_to_target_skeleton: the factor that needs to be multiplied from source\
        skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
        factor needs to be 0.01.
        :type scale_to_target_skeleton: float
        :rtype: SkeletonMotion
        """
        return SkeletonMotion.from_skeleton_state(
            super().retarget_to(
                joint_mapping,
                source_tpose_local_rotation,
                source_tpose_root_translation,
                target_skeleton_tree,
                target_tpose_local_rotation,
                target_tpose_root_translation,
                rotation_to_target_skeleton,
                scale_to_target_skeleton,
                z_up,
            ),
            self.fps,
        )

    def retarget_to_by_tpose(
        self,
        joint_mapping: Dict[str, str],
        source_tpose: "SkeletonState",
        target_tpose: "SkeletonState",
        rotation_to_target_skeleton,
        scale_to_target_skeleton: float,
        z_up: bool = True,
    ) -> "SkeletonMotion":
        """ 
        Same as the one in :class:`SkeletonState`. This method discards all velocity information before
        retargeting and re-estimate the velocity after the retargeting. The same fps is used in the
        new retargetted motion.

        :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
        the target skeleton
        :type joint_mapping: Dict[str, str]
        
        :param source_tpose: t-pose of the source skeleton
        :type source_tpose: SkeletonState
        
        :param target_tpose: t-pose of the target skeleton
        :type target_tpose: SkeletonState
        
        :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
        skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
        the frame of reference of the target skeleton and s is the frame of reference of the source\
        skeleton
        :type rotation_to_target_skeleton: Tensor
        :param scale_to_target_skeleton: the factor that needs to be multiplied from source\
        skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
        factor needs to be 0.01.
        :type scale_to_target_skeleton: float
        :rtype: SkeletonMotion
        """
        return self.retarget_to(
            joint_mapping,
            source_tpose.local_rotation,
            source_tpose.root_translation,
            target_tpose.skeleton_tree,
            target_tpose.local_rotation,
            target_tpose.root_translation,
            rotation_to_target_skeleton,
            scale_to_target_skeleton,
            z_up,
        )


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/common.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

from ..core import logger
from .plt_plotter import Matplotlib3DPlotter
from .skeleton_plotter_tasks import Draw3DSkeletonMotion, Draw3DSkeletonState


def plot_skeleton_state(skeleton_state, task_name=""):
    """
    Visualize a skeleton state

    :param skeleton_state:
    :param task_name:
    :type skeleton_state: SkeletonState
    :type task_name: string, optional
    """
    logger.info("plotting {}".format(task_name))
    task = Draw3DSkeletonState(task_name=task_name, skeleton_state=skeleton_state)
    plotter = Matplotlib3DPlotter(task)
    plotter.show()


def plot_skeleton_states(skeleton_state, skip_n=1, task_name=""):
    """
    Visualize a sequence of skeleton state. The dimension of the skeleton state must be 1

    :param skeleton_state:
    :param task_name:
    :type skeleton_state: SkeletonState
    :type task_name: string, optional
    """
    logger.info("plotting {} motion".format(task_name))
    assert len(skeleton_state.shape) == 1, "the state must have only one dimension"
    task = Draw3DSkeletonState(task_name=task_name, skeleton_state=skeleton_state[0])
    plotter = Matplotlib3DPlotter(task)
    for frame_id in range(skeleton_state.shape[0]):
        if frame_id % skip_n != 0:
            continue
        task.update(skeleton_state[frame_id])
        plotter.update()
    plotter.show()


def plot_skeleton_motion(skeleton_motion, skip_n=1, task_name=""):
    """
    Visualize a skeleton motion along its first dimension.

    :param skeleton_motion:
    :param task_name:
    :type skeleton_motion: SkeletonMotion
    :type task_name: string, optional
    """
    logger.info("plotting {} motion".format(task_name))
    task = Draw3DSkeletonMotion(
        task_name=task_name, skeleton_motion=skeleton_motion, frame_index=0
    )
    plotter = Matplotlib3DPlotter(task)
    for frame_id in range(len(skeleton_motion)):
        if frame_id % skip_n != 0:
            continue
        task.update(frame_id)
        plotter.update()
    plotter.show()


def plot_skeleton_motion_interactive_base(skeleton_motion, task_name=""):
    class PlotParams:
        def __init__(self, total_num_frames):
            self.current_frame = 0
            self.playing = False
            self.looping = False
            self.confirmed = False
            self.playback_speed = 4
            self.total_num_frames = total_num_frames

        def sync(self, other):
            self.current_frame = other.current_frame
            self.playing = other.playing
            self.looping = other.current_frame
            self.confirmed = other.confirmed
            self.playback_speed = other.playback_speed
            self.total_num_frames = other.total_num_frames

    task = Draw3DSkeletonMotion(
        task_name=task_name, skeleton_motion=skeleton_motion, frame_index=0
    )
    plotter = Matplotlib3DPlotter(task)

    plot_params = PlotParams(total_num_frames=len(skeleton_motion))
    print("Entered interactive plot - press 'n' to quit, 'h' for a list of commands")

    def press(event):
        if event.key == "x":
            plot_params.playing = not plot_params.playing
        elif event.key == "z":
            plot_params.current_frame = plot_params.current_frame - 1
        elif event.key == "c":
            plot_params.current_frame = plot_params.current_frame + 1
        elif event.key == "a":
            plot_params.current_frame = plot_params.current_frame - 20
        elif event.key == "d":
            plot_params.current_frame = plot_params.current_frame + 20
        elif event.key == "w":
            plot_params.looping = not plot_params.looping
            print("Looping: {}".format(plot_params.looping))
        elif event.key == "v":
            plot_params.playback_speed *= 2
            print("playback speed: {}".format(plot_params.playback_speed))
        elif event.key == "b":
            if plot_params.playback_speed != 1:
                plot_params.playback_speed //= 2
            print("playback speed: {}".format(plot_params.playback_speed))
        elif event.key == "n":
            plot_params.confirmed = True
        elif event.key == "h":
            rows, columns = os.popen("stty size", "r").read().split()
            columns = int(columns)
            print("=" * columns)
            print("x: play/pause")
            print("z: previous frame")
            print("c: next frame")
            print("a: jump 10 frames back")
            print("d: jump 10 frames forward")
            print("w: looping/non-looping")
            print("v: double speed (this can be applied multiple times)")
            print("b: half speed (this can be applied multiple times)")
            print("n: quit")
            print("h: help")
            print("=" * columns)

        print(
            'current frame index: {}/{} (press "n" to quit)'.format(
                plot_params.current_frame, plot_params.total_num_frames - 1
            )
        )

    plotter.fig.canvas.mpl_connect("key_press_event", press)
    while True:
        reset_trail = False
        if plot_params.confirmed:
            break
        if plot_params.playing:
            plot_params.current_frame += plot_params.playback_speed
        if plot_params.current_frame >= plot_params.total_num_frames:
            if plot_params.looping:
                plot_params.current_frame %= plot_params.total_num_frames
                reset_trail = True
            else:
                plot_params.current_frame = plot_params.total_num_frames - 1
        if plot_params.current_frame < 0:
            if plot_params.looping:
                plot_params.current_frame %= plot_params.total_num_frames
                reset_trail = True
            else:
                plot_params.current_frame = 0
        yield plot_params
        task.update(plot_params.current_frame, reset_trail)
        plotter.update()


def plot_skeleton_motion_interactive(skeleton_motion, task_name=""):
    """
    Visualize a skeleton motion along its first dimension interactively.

    :param skeleton_motion:
    :param task_name:
    :type skeleton_motion: SkeletonMotion
    :type task_name: string, optional
    """
    for _ in plot_skeleton_motion_interactive_base(skeleton_motion, task_name):
        pass


def plot_skeleton_motion_interactive_multiple(*callables, sync=True):
    for _ in zip(*callables):
        if sync:
            for p1, p2 in zip(_[:-1], _[1:]):
                p2.sync(p1)


# def plot_skeleton_motion_interactive_multiple_same(skeleton_motions, task_name=""):


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/core.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""
The base abstract classes for plotter and the plotting tasks. It describes how the plotter
deals with the tasks in the general cases
"""
from typing import List


class BasePlotterTask(object):
    _task_name: str  # unique name of the task
    _task_type: str  # type of the task is used to identify which callable

    def __init__(self, task_name: str, task_type: str) -> None:
        self._task_name = task_name
        self._task_type = task_type

    @property
    def task_name(self):
        return self._task_name

    @property
    def task_type(self):
        return self._task_type

    def get_scoped_name(self, name):
        return self._task_name + "/" + name

    def __iter__(self):
        """Should override this function to return a list of task primitives
        """
        raise NotImplementedError


class BasePlotterTasks(object):
    def __init__(self, tasks) -> None:
        self._tasks = tasks

    def __iter__(self):
        for task in self._tasks:
            yield from task


class BasePlotter(object):
    """An abstract plotter which deals with a plotting task. The children class needs to implement
    the functions to create/update the objects according to the task given
    """

    _task_primitives: List[BasePlotterTask]

    def __init__(self, task: BasePlotterTask) -> None:
        self._task_primitives = []
        self.create(task)

    @property
    def task_primitives(self):
        return self._task_primitives

    def create(self, task: BasePlotterTask) -> None:
        """Create more task primitives from a task for the plotter"""
        new_task_primitives = list(task)  # get all task primitives
        self._task_primitives += new_task_primitives  # append them
        self._create_impl(new_task_primitives)

    def update(self) -> None:
        """Update the plotter for any updates in the task primitives"""
        self._update_impl(self._task_primitives)

    def _update_impl(self, task_list: List[BasePlotterTask]) -> None:
        raise NotImplementedError

    def _create_impl(self, task_list: List[BasePlotterTask]) -> None:
        raise NotImplementedError


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/plt_plotter.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


"""
The matplotlib plotter implementation for all the primitive tasks (in our case: lines and
dots)
"""
from typing import Any, Callable, Dict, List

import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3

import numpy as np

from .core import BasePlotter, BasePlotterTask


class Matplotlib2DPlotter(BasePlotter):
    _fig: plt.figure  # plt figure
    _ax: plt.axis  # plt axis
    # stores artist objects for each task (task name as the key)
    _artist_cache: Dict[str, Any]
    # callables for each task primitives
    _create_impl_callables: Dict[str, Callable]
    _update_impl_callables: Dict[str, Callable]

    def __init__(self, task: "BasePlotterTask") -> None:
        fig, ax = plt.subplots()
        self._fig = fig
        self._ax = ax
        self._artist_cache = {}

        self._create_impl_callables = {
            "Draw2DLines": self._lines_create_impl,
            "Draw2DDots": self._dots_create_impl,
            "Draw2DTrail": self._trail_create_impl,
        }
        self._update_impl_callables = {
            "Draw2DLines": self._lines_update_impl,
            "Draw2DDots": self._dots_update_impl,
            "Draw2DTrail": self._trail_update_impl,
        }
        self._init_lim()
        super().__init__(task)

    @property
    def ax(self):
        return self._ax

    @property
    def fig(self):
        return self._fig

    def show(self):
        plt.show()

    def _min(self, x, y):
        if x is None:
            return y
        if y is None:
            return x
        return min(x, y)

    def _max(self, x, y):
        if x is None:
            return y
        if y is None:
            return x
        return max(x, y)

    def _init_lim(self):
        self._curr_x_min = None
        self._curr_y_min = None
        self._curr_x_max = None
        self._curr_y_max = None

    def _update_lim(self, xs, ys):
        self._curr_x_min = self._min(np.min(xs), self._curr_x_min)
        self._curr_y_min = self._min(np.min(ys), self._curr_y_min)
        self._curr_x_max = self._max(np.max(xs), self._curr_x_max)
        self._curr_y_max = self._max(np.max(ys), self._curr_y_max)

    def _set_lim(self):
        if not (
            self._curr_x_min is None
            or self._curr_x_max is None
            or self._curr_y_min is None
            or self._curr_y_max is None
        ):
            self._ax.set_xlim(self._curr_x_min, self._curr_x_max)
            self._ax.set_ylim(self._curr_y_min, self._curr_y_max)
        self._init_lim()

    @staticmethod
    def _lines_extract_xy_impl(index, lines_task):
        return lines_task[index, :, 0], lines_task[index, :, 1]

    @staticmethod
    def _trail_extract_xy_impl(index, trail_task):
        return (trail_task[index : index + 2, 0], trail_task[index : index + 2, 1])

    def _lines_create_impl(self, lines_task):
        color = lines_task.color
        self._artist_cache[lines_task.task_name] = [
            self._ax.plot(
                *Matplotlib2DPlotter._lines_extract_xy_impl(i, lines_task),
                color=color,
                linewidth=lines_task.line_width,
                alpha=lines_task.alpha
            )[0]
            for i in range(len(lines_task))
        ]

    def _lines_update_impl(self, lines_task):
        lines_artists = self._artist_cache[lines_task.task_name]
        for i in range(len(lines_task)):
            artist = lines_artists[i]
            xs, ys = Matplotlib2DPlotter._lines_extract_xy_impl(i, lines_task)
            artist.set_data(xs, ys)
            if lines_task.influence_lim:
                self._update_lim(xs, ys)

    def _dots_create_impl(self, dots_task):
        color = dots_task.color
        self._artist_cache[dots_task.task_name] = self._ax.plot(
            dots_task[:, 0],
            dots_task[:, 1],
            c=color,
            linestyle="",
            marker=".",
            markersize=dots_task.marker_size,
            alpha=dots_task.alpha,
        )[0]

    def _dots_update_impl(self, dots_task):
        dots_artist = self._artist_cache[dots_task.task_name]
        dots_artist.set_data(dots_task[:, 0], dots_task[:, 1])
        if dots_task.influence_lim:
            self._update_lim(dots_task[:, 0], dots_task[:, 1])

    def _trail_create_impl(self, trail_task):
        color = trail_task.color
        trail_length = len(trail_task) - 1
        self._artist_cache[trail_task.task_name] = [
            self._ax.plot(
                *Matplotlib2DPlotter._trail_extract_xy_impl(i, trail_task),
                color=trail_task.color,
                linewidth=trail_task.line_width,
                alpha=trail_task.alpha * (1.0 - i / (trail_length - 1))
            )[0]
            for i in range(trail_length)
        ]

    def _trail_update_impl(self, trail_task):
        trails_artists = self._artist_cache[trail_task.task_name]
        for i in range(len(trail_task) - 1):
            artist = trails_artists[i]
            xs, ys = Matplotlib2DPlotter._trail_extract_xy_impl(i, trail_task)
            artist.set_data(xs, ys)
            if trail_task.influence_lim:
                self._update_lim(xs, ys)

    def _create_impl(self, task_list):
        for task in task_list:
            self._create_impl_callables[task.task_type](task)
        self._draw()

    def _update_impl(self, task_list):
        for task in task_list:
            self._update_impl_callables[task.task_type](task)
        self._draw()

    def _set_aspect_equal_2d(self, zero_centered=True):
        xlim = self._ax.get_xlim()
        ylim = self._ax.get_ylim()

        if not zero_centered:
            xmean = np.mean(xlim)
            ymean = np.mean(ylim)
        else:
            xmean = 0
            ymean = 0

        plot_radius = max(
            [
                abs(lim - mean_)
                for lims, mean_ in ((xlim, xmean), (ylim, ymean))
                for lim in lims
            ]
        )

        self._ax.set_xlim([xmean - plot_radius, xmean + plot_radius])
        self._ax.set_ylim([ymean - plot_radius, ymean + plot_radius])

    def _draw(self):
        self._set_lim()
        self._set_aspect_equal_2d()
        self._fig.canvas.draw()
        self._fig.canvas.flush_events()
        plt.pause(0.00001)


class Matplotlib3DPlotter(BasePlotter):
    _fig: plt.figure  # plt figure
    _ax: p3.Axes3D  # plt 3d axis
    # stores artist objects for each task (task name as the key)
    _artist_cache: Dict[str, Any]
    # callables for each task primitives
    _create_impl_callables: Dict[str, Callable]
    _update_impl_callables: Dict[str, Callable]

    def __init__(self, task: "BasePlotterTask") -> None:
        self._fig = plt.figure()
        self._ax = p3.Axes3D(self._fig)
        self._artist_cache = {}

        self._create_impl_callables = {
            "Draw3DLines": self._lines_create_impl,
            "Draw3DDots": self._dots_create_impl,
            "Draw3DTrail": self._trail_create_impl,
        }
        self._update_impl_callables = {
            "Draw3DLines": self._lines_update_impl,
            "Draw3DDots": self._dots_update_impl,
            "Draw3DTrail": self._trail_update_impl,
        }
        self._init_lim()
        super().__init__(task)

    @property
    def ax(self):
        return self._ax

    @property
    def fig(self):
        return self._fig

    def show(self):
        plt.show()

    def _min(self, x, y):
        if x is None:
            return y
        if y is None:
            return x
        return min(x, y)

    def _max(self, x, y):
        if x is None:
            return y
        if y is None:
            return x
        return max(x, y)

    def _init_lim(self):
        self._curr_x_min = None
        self._curr_y_min = None
        self._curr_z_min = None
        self._curr_x_max = None
        self._curr_y_max = None
        self._curr_z_max = None

    def _update_lim(self, xs, ys, zs):
        self._curr_x_min = self._min(np.min(xs), self._curr_x_min)
        self._curr_y_min = self._min(np.min(ys), self._curr_y_min)
        self._curr_z_min = self._min(np.min(zs), self._curr_z_min)
        self._curr_x_max = self._max(np.max(xs), self._curr_x_max)
        self._curr_y_max = self._max(np.max(ys), self._curr_y_max)
        self._curr_z_max = self._max(np.max(zs), self._curr_z_max)

    def _set_lim(self):
        if not (
            self._curr_x_min is None
            or self._curr_x_max is None
            or self._curr_y_min is None
            or self._curr_y_max is None
            or self._curr_z_min is None
            or self._curr_z_max is None
        ):
            self._ax.set_xlim3d(self._curr_x_min, self._curr_x_max)
            self._ax.set_ylim3d(self._curr_y_min, self._curr_y_max)
            self._ax.set_zlim3d(self._curr_z_min, self._curr_z_max)
        self._init_lim()

    @staticmethod
    def _lines_extract_xyz_impl(index, lines_task):
        return lines_task[index, :, 0], lines_task[index, :, 1], lines_task[index, :, 2]

    @staticmethod
    def _trail_extract_xyz_impl(index, trail_task):
        return (
            trail_task[index : index + 2, 0],
            trail_task[index : index + 2, 1],
            trail_task[index : index + 2, 2],
        )

    def _lines_create_impl(self, lines_task):
        color = lines_task.color
        self._artist_cache[lines_task.task_name] = [
            self._ax.plot(
                *Matplotlib3DPlotter._lines_extract_xyz_impl(i, lines_task),
                color=color,
                linewidth=lines_task.line_width,
                alpha=lines_task.alpha
            )[0]
            for i in range(len(lines_task))
        ]

    def _lines_update_impl(self, lines_task):
        lines_artists = self._artist_cache[lines_task.task_name]
        for i in range(len(lines_task)):
            artist = lines_artists[i]
            xs, ys, zs = Matplotlib3DPlotter._lines_extract_xyz_impl(i, lines_task)
            artist.set_data(xs, ys)
            artist.set_3d_properties(zs)
            if lines_task.influence_lim:
                self._update_lim(xs, ys, zs)

    def _dots_create_impl(self, dots_task):
        color = dots_task.color
        self._artist_cache[dots_task.task_name] = self._ax.plot(
            dots_task[:, 0],
            dots_task[:, 1],
            dots_task[:, 2],
            c=color,
            linestyle="",
            marker=".",
            markersize=dots_task.marker_size,
            alpha=dots_task.alpha,
        )[0]

    def _dots_update_impl(self, dots_task):
        dots_artist = self._artist_cache[dots_task.task_name]
        dots_artist.set_data(dots_task[:, 0], dots_task[:, 1])
        dots_artist.set_3d_properties(dots_task[:, 2])
        if dots_task.influence_lim:
            self._update_lim(dots_task[:, 0], dots_task[:, 1], dots_task[:, 2])

    def _trail_create_impl(self, trail_task):
        color = trail_task.color
        trail_length = len(trail_task) - 1
        self._artist_cache[trail_task.task_name] = [
            self._ax.plot(
                *Matplotlib3DPlotter._trail_extract_xyz_impl(i, trail_task),
                color=trail_task.color,
                linewidth=trail_task.line_width,
                alpha=trail_task.alpha * (1.0 - i / (trail_length - 1))
            )[0]
            for i in range(trail_length)
        ]

    def _trail_update_impl(self, trail_task):
        trails_artists = self._artist_cache[trail_task.task_name]
        for i in range(len(trail_task) - 1):
            artist = trails_artists[i]
            xs, ys, zs = Matplotlib3DPlotter._trail_extract_xyz_impl(i, trail_task)
            artist.set_data(xs, ys)
            artist.set_3d_properties(zs)
            if trail_task.influence_lim:
                self._update_lim(xs, ys, zs)

    def _create_impl(self, task_list):
        for task in task_list:
            self._create_impl_callables[task.task_type](task)
        self._draw()

    def _update_impl(self, task_list):
        for task in task_list:
            self._update_impl_callables[task.task_type](task)
        self._draw()

    def _set_aspect_equal_3d(self):
        xlim = self._ax.get_xlim3d()
        ylim = self._ax.get_ylim3d()
        zlim = self._ax.get_zlim3d()

        xmean = np.mean(xlim)
        ymean = np.mean(ylim)
        zmean = np.mean(zlim)

        plot_radius = max(
            [
                abs(lim - mean_)
                for lims, mean_ in ((xlim, xmean), (ylim, ymean), (zlim, zmean))
                for lim in lims
            ]
        )

        self._ax.set_xlim3d([xmean - plot_radius, xmean + plot_radius])
        self._ax.set_ylim3d([ymean - plot_radius, ymean + plot_radius])
        self._ax.set_zlim3d([zmean - plot_radius, zmean + plot_radius])

    def _draw(self):
        self._set_lim()
        self._set_aspect_equal_3d()
        self._fig.canvas.draw()
        self._fig.canvas.flush_events()
        plt.pause(0.00001)


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/simple_plotter_tasks.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


"""
This is where all the task primitives are defined
"""
import numpy as np

from .core import BasePlotterTask


class DrawXDLines(BasePlotterTask):
    _lines: np.ndarray
    _color: str
    _line_width: int
    _alpha: float
    _influence_lim: bool

    def __init__(
        self,
        task_name: str,
        lines: np.ndarray,
        color: str = "blue",
        line_width: int = 2,
        alpha: float = 1.0,
        influence_lim: bool = True,
    ) -> None:
        super().__init__(task_name=task_name, task_type=self.__class__.__name__)
        self._color = color
        self._line_width = line_width
        self._alpha = alpha
        self._influence_lim = influence_lim
        self.update(lines)

    @property
    def influence_lim(self) -> bool:
        return self._influence_lim

    @property
    def raw_data(self):
        return self._lines

    @property
    def color(self):
        return self._color

    @property
    def line_width(self):
        return self._line_width

    @property
    def alpha(self):
        return self._alpha

    @property
    def dim(self):
        raise NotImplementedError

    @property
    def name(self):
        return "{}DLines".format(self.dim)

    def update(self, lines):
        self._lines = np.array(lines)
        shape = self._lines.shape
        assert shape[-1] == self.dim and shape[-2] == 2 and len(shape) == 3

    def __getitem__(self, index):
        return self._lines[index]

    def __len__(self):
        return self._lines.shape[0]

    def __iter__(self):
        yield self


class DrawXDDots(BasePlotterTask):
    _dots: np.ndarray
    _color: str
    _marker_size: int
    _alpha: float
    _influence_lim: bool

    def __init__(
        self,
        task_name: str,
        dots: np.ndarray,
        color: str = "blue",
        marker_size: int = 10,
        alpha: float = 1.0,
        influence_lim: bool = True,
    ) -> None:
        super().__init__(task_name=task_name, task_type=self.__class__.__name__)
        self._color = color
        self._marker_size = marker_size
        self._alpha = alpha
        self._influence_lim = influence_lim
        self.update(dots)

    def update(self, dots):
        self._dots = np.array(dots)
        shape = self._dots.shape
        assert shape[-1] == self.dim and len(shape) == 2

    def __getitem__(self, index):
        return self._dots[index]

    def __len__(self):
        return self._dots.shape[0]

    def __iter__(self):
        yield self

    @property
    def influence_lim(self) -> bool:
        return self._influence_lim

    @property
    def raw_data(self):
        return self._dots

    @property
    def color(self):
        return self._color

    @property
    def marker_size(self):
        return self._marker_size

    @property
    def alpha(self):
        return self._alpha

    @property
    def dim(self):
        raise NotImplementedError

    @property
    def name(self):
        return "{}DDots".format(self.dim)


class DrawXDTrail(DrawXDDots):
    @property
    def line_width(self):
        return self.marker_size

    @property
    def name(self):
        return "{}DTrail".format(self.dim)


class Draw2DLines(DrawXDLines):
    @property
    def dim(self):
        return 2


class Draw3DLines(DrawXDLines):
    @property
    def dim(self):
        return 3


class Draw2DDots(DrawXDDots):
    @property
    def dim(self):
        return 2


class Draw3DDots(DrawXDDots):
    @property
    def dim(self):
        return 3


class Draw2DTrail(DrawXDTrail):
    @property
    def dim(self):
        return 2


class Draw3DTrail(DrawXDTrail):
    @property
    def dim(self):
        return 3


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/skeleton_plotter_tasks.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


"""
This is where all skeleton related complex tasks are defined (skeleton state and skeleton
motion)
"""
import numpy as np

from .core import BasePlotterTask
from .simple_plotter_tasks import Draw3DDots, Draw3DLines, Draw3DTrail


class Draw3DSkeletonState(BasePlotterTask):
    _lines_task: Draw3DLines  # sub-task for drawing lines
    _dots_task: Draw3DDots  # sub-task for drawing dots

    def __init__(
        self,
        task_name: str,
        skeleton_state,
        joints_color: str = "red",
        lines_color: str = "blue",
        alpha=1.0,
    ) -> None:
        super().__init__(task_name=task_name, task_type="3DSkeletonState")
        lines, dots = Draw3DSkeletonState._get_lines_and_dots(skeleton_state)
        self._lines_task = Draw3DLines(
            self.get_scoped_name("bodies"), lines, joints_color, alpha=alpha
        )
        self._dots_task = Draw3DDots(
            self.get_scoped_name("joints"), dots, lines_color, alpha=alpha
        )

    @property
    def name(self):
        return "3DSkeleton"

    def update(self, skeleton_state) -> None:
        self._update(*Draw3DSkeletonState._get_lines_and_dots(skeleton_state))

    @staticmethod
    def _get_lines_and_dots(skeleton_state):
        """Get all the lines and dots needed to draw the skeleton state
        """
        assert (
            len(skeleton_state.tensor.shape) == 1
        ), "the state has to be zero dimensional"
        dots = skeleton_state.global_translation.numpy()
        skeleton_tree = skeleton_state.skeleton_tree
        parent_indices = skeleton_tree.parent_indices.numpy()
        lines = []
        for node_index in range(len(skeleton_tree)):
            parent_index = parent_indices[node_index]
            if parent_index != -1:
                lines.append([dots[node_index], dots[parent_index]])
        lines = np.array(lines)
        return lines, dots

    def _update(self, lines, dots) -> None:
        self._lines_task.update(lines)
        self._dots_task.update(dots)

    def __iter__(self):
        yield from self._lines_task
        yield from self._dots_task


class Draw3DSkeletonMotion(BasePlotterTask):
    def __init__(
        self,
        task_name: str,
        skeleton_motion,
        frame_index=None,
        joints_color="red",
        lines_color="blue",
        velocity_color="green",
        angular_velocity_color="purple",
        trail_color="black",
        trail_length=10,
        alpha=1.0,
    ) -> None:
        super().__init__(task_name=task_name, task_type="3DSkeletonMotion")
        self._trail_length = trail_length
        self._skeleton_motion = skeleton_motion
        # if frame_index is None:
        curr_skeleton_motion = self._skeleton_motion.clone()
        if frame_index is not None:
            curr_skeleton_motion.tensor = self._skeleton_motion.tensor[frame_index, :]
        # else:
        #     curr_skeleton_motion = self._skeleton_motion[frame_index, :]
        self._skeleton_state_task = Draw3DSkeletonState(
            self.get_scoped_name("skeleton_state"),
            curr_skeleton_motion,
            joints_color=joints_color,
            lines_color=lines_color,
            alpha=alpha,
        )
        vel_lines, avel_lines = Draw3DSkeletonMotion._get_vel_and_avel(
            curr_skeleton_motion
        )
        self._com_pos = curr_skeleton_motion.root_translation.numpy()[
            np.newaxis, ...
        ].repeat(trail_length, axis=0)
        self._vel_task = Draw3DLines(
            self.get_scoped_name("velocity"),
            vel_lines,
            velocity_color,
            influence_lim=False,
            alpha=alpha,
        )
        self._avel_task = Draw3DLines(
            self.get_scoped_name("angular_velocity"),
            avel_lines,
            angular_velocity_color,
            influence_lim=False,
            alpha=alpha,
        )
        self._com_trail_task = Draw3DTrail(
            self.get_scoped_name("com_trail"),
            self._com_pos,
            trail_color,
            marker_size=2,
            influence_lim=True,
            alpha=alpha,
        )

    @property
    def name(self):
        return "3DSkeletonMotion"

    def update(self, frame_index=None, reset_trail=False, skeleton_motion=None) -> None:
        if skeleton_motion is not None:
            self._skeleton_motion = skeleton_motion

        curr_skeleton_motion = self._skeleton_motion.clone()
        if frame_index is not None:
            curr_skeleton_motion.tensor = curr_skeleton_motion.tensor[frame_index, :]
        if reset_trail:
            self._com_pos = curr_skeleton_motion.root_translation.numpy()[
                np.newaxis, ...
            ].repeat(self._trail_length, axis=0)
        else:
            self._com_pos = np.concatenate(
                (
                    curr_skeleton_motion.root_translation.numpy()[np.newaxis, ...],
                    self._com_pos[:-1],
                ),
                axis=0,
            )
        self._skeleton_state_task.update(curr_skeleton_motion)
        self._com_trail_task.update(self._com_pos)
        self._update(*Draw3DSkeletonMotion._get_vel_and_avel(curr_skeleton_motion))

    @staticmethod
    def _get_vel_and_avel(skeleton_motion):
        """Get all the velocity and angular velocity lines
        """
        pos = skeleton_motion.global_translation.numpy()
        vel = skeleton_motion.global_velocity.numpy()
        avel = skeleton_motion.global_angular_velocity.numpy()

        vel_lines = np.stack((pos, pos + vel * 0.02), axis=1)
        avel_lines = np.stack((pos, pos + avel * 0.01), axis=1)
        return vel_lines, avel_lines

    def _update(self, vel_lines, avel_lines) -> None:
        self._vel_task.update(vel_lines)
        self._avel_task.update(avel_lines)

    def __iter__(self):
        yield from self._skeleton_state_task
        yield from self._vel_task
        yield from self._avel_task
        yield from self._com_trail_task


class Draw3DSkeletonMotions(BasePlotterTask):
    def __init__(self, skeleton_motion_tasks) -> None:
        self._skeleton_motion_tasks = skeleton_motion_tasks

    @property
    def name(self):
        return "3DSkeletonMotions"

    def update(self, frame_index) -> None:
        list(map(lambda x: x.update(frame_index), self._skeleton_motion_tasks))

    def __iter__(self):
        yield from self._skeleton_state_tasks


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/tests/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/tests/test_plotter.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

from typing import cast

import matplotlib.pyplot as plt
import numpy as np

from ..core import BasePlotterTask, BasePlotterTasks
from ..plt_plotter import Matplotlib3DPlotter
from ..simple_plotter_tasks import Draw3DDots, Draw3DLines

task = Draw3DLines(task_name="test", 
    lines=np.array([[[0, 0, 0], [0, 0, 1]], [[0, 1, 1], [0, 1, 0]]]), color="blue")
task2 = Draw3DDots(task_name="test2", 
    dots=np.array([[0, 0, 0], [0, 0, 1], [0, 1, 1], [0, 1, 0]]), color="red")
task3 = BasePlotterTasks([task, task2])
plotter = Matplotlib3DPlotter(cast(BasePlotterTask, task3))
plt.show()


================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/retarget_motion.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from isaacgym.torch_utils import *
import torch
import json
import numpy as np

from poselib.core.rotation3d import *
from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState, SkeletonMotion
from poselib.visualization.common import plot_skeleton_state, plot_skeleton_motion_interactive

"""
This scripts shows how to retarget a motion clip from the source skeleton to a target skeleton.
Data required for retargeting are stored in a retarget config dictionary as a json file. This file contains:
  - source_motion: a SkeletonMotion npy format representation of a motion sequence. The motion clip should use the same skeleton as the source T-Pose skeleton.
  - target_motion_path: path to save the retargeted motion to
  - source_tpose: a SkeletonState npy format representation of the source skeleton in it's T-Pose state
  - target_tpose: a SkeletonState npy format representation of the target skeleton in it's T-Pose state (pose should match source T-Pose)
  - joint_mapping: mapping of joint names from source to target
  - rotation: root rotation offset from source to target skeleton (for transforming across different orientation axes), represented as a quaternion in XYZW order.
  - scale: scale offset from source to target skeleton
"""

VISUALIZE = False

def project_joints(motion):
    right_upper_arm_id = motion.skeleton_tree._node_indices["right_upper_arm"]
    right_lower_arm_id = motion.skeleton_tree._node_indices["right_lower_arm"]
    right_hand_id = motion.skeleton_tree._node_indices["right_hand"]
    left_upper_arm_id = motion.skeleton_tree._node_indices["left_upper_arm"]
    left_lower_arm_id = motion.skeleton_tree._node_indices["left_lower_arm"]
    left_hand_id = motion.skeleton_tree._node_indices["left_hand"]
    
    right_thigh_id = motion.skeleton_tree._node_indices["right_thigh"]
    right_shin_id = motion.skeleton_tree._node_indices["right_shin"]
    right_foot_id = motion.skeleton_tree._node_indices["right_foot"]
    left_thigh_id = motion.skeleton_tree._node_indices["left_thigh"]
    left_shin_id = motion.skeleton_tree._node_indices["left_shin"]
    left_foot_id = motion.skeleton_tree._node_indices["left_foot"]
    
    device = motion.global_translation.device

    # right arm
    right_upper_arm_pos = motion.global_translation[..., right_upper_arm_id, :]
    right_lower_arm_pos = motion.global_translation[..., right_lower_arm_id, :]
    right_hand_pos = motion.global_translation[..., right_hand_id, :]
    right_shoulder_rot = motion.local_rotation[..., right_upper_arm_id, :]
    right_elbow_rot = motion.local_rotation[..., right_lower_arm_id, :]
    
    right_arm_delta0 = right_upper_arm_pos - right_lower_arm_pos
    right_arm_delta1 = right_hand_pos - right_lower_arm_pos
    right_arm_delta0 = right_arm_delta0 / torch.norm(right_arm_delta0, dim=-1, keepdim=True)
    right_arm_delta1 = right_arm_delta1 / torch.norm(right_arm_delta1, dim=-1, keepdim=True)
    right_elbow_dot = torch.sum(-right_arm_delta0 * right_arm_delta1, dim=-1)
    right_elbow_dot = torch.clamp(right_elbow_dot, -1.0, 1.0)
    right_elbow_theta = torch.acos(right_elbow_dot)
    right_elbow_q = quat_from_angle_axis(-torch.abs(right_elbow_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), 
                                            device=device, dtype=torch.float32))
    
    right_elbow_local_dir = motion.skeleton_tree.local_translation[right_hand_id]
    right_elbow_local_dir = right_elbow_local_dir / torch.norm(right_elbow_local_dir)
    right_elbow_local_dir_tile = torch.tile(right_elbow_local_dir.unsqueeze(0), [right_elbow_rot.shape[0], 1])
    right_elbow_local_dir0 = quat_rotate(right_elbow_rot, right_elbow_local_dir_tile)
    right_elbow_local_dir1 = quat_rotate(right_elbow_q, right_elbow_local_dir_tile)
    right_arm_dot = torch.sum(right_elbow_local_dir0 * right_elbow_local_dir1, dim=-1)
    right_arm_dot = torch.clamp(right_arm_dot, -1.0, 1.0)
    right_arm_theta = torch.acos(right_arm_dot)
    right_arm_theta = torch.where(right_elbow_local_dir0[..., 1] <= 0, right_arm_theta, -right_arm_theta)
    right_arm_q = quat_from_angle_axis(right_arm_theta, right_elbow_local_dir.unsqueeze(0))
    right_shoulder_rot = quat_mul(right_shoulder_rot, right_arm_q)
    
    # left arm
    left_upper_arm_pos = motion.global_translation[..., left_upper_arm_id, :]
    left_lower_arm_pos = motion.global_translation[..., left_lower_arm_id, :]
    left_hand_pos = motion.global_translation[..., left_hand_id, :]
    left_shoulder_rot = motion.local_rotation[..., left_upper_arm_id, :]
    left_elbow_rot = motion.local_rotation[..., left_lower_arm_id, :]
    
    left_arm_delta0 = left_upper_arm_pos - left_lower_arm_pos
    left_arm_delta1 = left_hand_pos - left_lower_arm_pos
    left_arm_delta0 = left_arm_delta0 / torch.norm(left_arm_delta0, dim=-1, keepdim=True)
    left_arm_delta1 = left_arm_delta1 / torch.norm(left_arm_delta1, dim=-1, keepdim=True)
    left_elbow_dot = torch.sum(-left_arm_delta0 * left_arm_delta1, dim=-1)
    left_elbow_dot = torch.clamp(left_elbow_dot, -1.0, 1.0)
    left_elbow_theta = torch.acos(left_elbow_dot)
    left_elbow_q = quat_from_angle_axis(-torch.abs(left_elbow_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), 
                                        device=device, dtype=torch.float32))

    left_elbow_local_dir = motion.skeleton_tree.local_translation[left_hand_id]
    left_elbow_local_dir = left_elbow_local_dir / torch.norm(left_elbow_local_dir)
    left_elbow_local_dir_tile = torch.tile(left_elbow_local_dir.unsqueeze(0), [left_elbow_rot.shape[0], 1])
    left_elbow_local_dir0 = quat_rotate(left_elbow_rot, left_elbow_local_dir_tile)
    left_elbow_local_dir1 = quat_rotate(left_elbow_q, left_elbow_local_dir_tile)
    left_arm_dot = torch.sum(left_elbow_local_dir0 * left_elbow_local_dir1, dim=-1)
    left_arm_dot = torch.clamp(left_arm_dot, -1.0, 1.0)
    left_arm_theta = torch.acos(left_arm_dot)
    left_arm_theta = torch.where(left_elbow_local_dir0[..., 1] <= 0, left_arm_theta, -left_arm_theta)
    left_arm_q = quat_from_angle_axis(left_arm_theta, left_elbow_local_dir.unsqueeze(0))
    left_shoulder_rot = quat_mul(left_shoulder_rot, left_arm_q)
    
    # right leg
    right_thigh_pos = motion.global_translation[..., right_thigh_id, :]
    right_shin_pos = motion.global_translation[..., right_shin_id, :]
    right_foot_pos = motion.global_translation[..., right_foot_id, :]
    right_hip_rot = motion.local_rotation[..., right_thigh_id, :]
    right_knee_rot = motion.local_rotation[..., right_shin_id, :]
    
    right_leg_delta0 = right_thigh_pos - right_shin_pos
    right_leg_delta1 = right_foot_pos - right_shin_pos
    right_leg_delta0 = right_leg_delta0 / torch.norm(right_leg_delta0, dim=-1, keepdim=True)
    right_leg_delta1 = right_leg_delta1 / torch.norm(right_leg_delta1, dim=-1, keepdim=True)
    right_knee_dot = torch.sum(-right_leg_delta0 * right_leg_delta1, dim=-1)
    right_knee_dot = torch.clamp(right_knee_dot, -1.0, 1.0)
    right_knee_theta = torch.acos(right_knee_dot)
    right_knee_q = quat_from_angle_axis(torch.abs(right_knee_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), 
                                        device=device, dtype=torch.float32))
    
    right_knee_local_dir = motion.skeleton_tree.local_translation[right_foot_id]
    right_knee_local_dir = right_knee_local_dir / torch.norm(right_knee_local_dir)
    right_knee_local_dir_tile = torch.tile(right_knee_local_dir.unsqueeze(0), [right_knee_rot.shape[0], 1])
    right_knee_local_dir0 = quat_rotate(right_knee_rot, right_knee_local_dir_tile)
    right_knee_local_dir1 = quat_rotate(right_knee_q, right_knee_local_dir_tile)
    right_leg_dot = torch.sum(right_knee_local_dir0 * right_knee_local_dir1, dim=-1)
    right_leg_dot = torch.clamp(right_leg_dot, -1.0, 1.0)
    right_leg_theta = torch.acos(right_leg_dot)
    right_leg_theta = torch.where(right_knee_local_dir0[..., 1] >= 0, right_leg_theta, -right_leg_theta)
    right_leg_q = quat_from_angle_axis(right_leg_theta, right_knee_local_dir.unsqueeze(0))
    right_hip_rot = quat_mul(right_hip_rot, right_leg_q)
    
    # left leg
    left_thigh_pos = motion.global_translation[..., left_thigh_id, :]
    left_shin_pos = motion.global_translation[..., left_shin_id, :]
    left_foot_pos = motion.global_translation[..., left_foot_id, :]
    left_hip_rot = motion.local_rotation[..., left_thigh_id, :]
    left_knee_rot = motion.local_rotation[..., left_shin_id, :]
    
    left_leg_delta0 = left_thigh_pos - left_shin_pos
    left_leg_delta1 = left_foot_pos - left_shin_pos
    left_leg_delta0 = left_leg_delta0 / torch.norm(left_leg_delta0, dim=-1, keepdim=True)
    left_leg_delta1 = left_leg_delta1 / torch.norm(left_leg_delta1, dim=-1, keepdim=True)
    left_knee_dot = torch.sum(-left_leg_delta0 * left_leg_delta1, dim=-1)
    left_knee_dot = torch.clamp(left_knee_dot, -1.0, 1.0)
    left_knee_theta = torch.acos(left_knee_dot)
    left_knee_q = quat_from_angle_axis(torch.abs(left_knee_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), 
                                        device=device, dtype=torch.float32))
    
    left_knee_local_dir = motion.skeleton_tree.local_translation[left_foot_id]
    left_knee_local_dir = left_knee_local_dir / torch.norm(left_knee_local_dir)
    left_knee_local_dir_tile = torch.tile(left_knee_local_dir.unsqueeze(0), [left_knee_rot.shape[0], 1])
    left_knee_local_dir0 = quat_rotate(left_knee_rot, left_knee_local_dir_tile)
    left_knee_local_dir1 = quat_rotate(left_knee_q, left_knee_local_dir_tile)
    left_leg_dot = torch.sum(left_knee_local_dir0 * left_knee_local_dir1, dim=-1)
    left_leg_dot = torch.clamp(left_leg_dot, -1.0, 1.0)
    left_leg_theta = torch.acos(left_leg_dot)
    left_leg_theta = torch.where(left_knee_local_dir0[..., 1] >= 0, left_leg_theta, -left_leg_theta)
    left_leg_q = quat_from_angle_axis(left_leg_theta, left_knee_local_dir.unsqueeze(0))
    left_hip_rot = quat_mul(left_hip_rot, left_leg_q)
    

    new_local_rotation = motion.local_rotation.clone()
    new_local_rotation[..., right_upper_arm_id, :] = right_shoulder_rot
    new_local_rotation[..., right_lower_arm_id, :] = right_elbow_q
    new_local_rotation[..., left_upper_arm_id, :] = left_shoulder_rot
    new_local_rotation[..., left_lower_arm_id, :] = left_elbow_q
    
    new_local_rotation[..., right_thigh_id, :] = right_hip_rot
    new_local_rotation[..., right_shin_id, :] = right_knee_q
    new_local_rotation[..., left_thigh_id, :] = left_hip_rot
    new_local_rotation[..., left_shin_id, :] = left_knee_q
    
    new_local_rotation[..., left_hand_id, :] = quat_identity([1])
    new_local_rotation[..., right_hand_id, :] = quat_identity([1])

    new_sk_state = SkeletonState.from_rotation_and_root_translation(motion.skeleton_tree, new_local_rotation, motion.root_translation, is_local=True)
    new_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=motion.fps)
    
    return new_motion


def main():
    # load retarget config
    retarget_data_path = "data/configs/retarget_cmu_to_amp.json"
    with open(retarget_data_path) as f:
        retarget_data = json.load(f)

    # load and visualize t-pose files
    source_tpose = SkeletonState.from_file(retarget_data["source_tpose"])
    if VISUALIZE:
        plot_skeleton_state(source_tpose)

    target_tpose = SkeletonState.from_file(retarget_data["target_tpose"])
    if VISUALIZE:
        plot_skeleton_state(target_tpose)

    # load and visualize source motion sequence
    source_motion = SkeletonMotion.from_file(retarget_data["source_motion"])
    if VISUALIZE:
        plot_skeleton_motion_interactive(source_motion)

    # parse data from retarget config
    joint_mapping = retarget_data["joint_mapping"]
    rotation_to_target_skeleton = torch.tensor(retarget_data["rotation"])

    # run retargeting
    target_motion = source_motion.retarget_to_by_tpose(
      joint_mapping=retarget_data["joint_mapping"],
      source_tpose=source_tpose,
      target_tpose=target_tpose,
      rotation_to_target_skeleton=rotation_to_target_skeleton,
      scale_to_target_skeleton=retarget_data["scale"]
    )

    # keep frames between [trim_frame_beg, trim_frame_end - 1]
    frame_beg = retarget_data["trim_frame_beg"]
    frame_end = retarget_data["trim_frame_end"]
    if (frame_beg == -1):
        frame_beg = 0
        
    if (frame_end == -1):
        frame_end = target_motion.local_rotation.shape[0]
        
    local_rotation = target_motion.local_rotation
    root_translation = target_motion.root_translation
    local_rotation = local_rotation[frame_beg:frame_end, ...]
    root_translation = root_translation[frame_beg:frame_end, ...]
      
    new_sk_state = SkeletonState.from_rotation_and_root_translation(target_motion.skeleton_tree, local_rotation, root_translation, is_local=True)
    target_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=target_motion.fps)

    # need to convert some joints from 3D to 1D (e.g. elbows and knees)
    target_motion = project_joints(target_motion)

    # move the root so that the feet are on the ground
    local_rotation = target_motion.local_rotation
    root_translation = target_motion.root_translation
    tar_global_pos = target_motion.global_translation
    min_h = torch.min(tar_global_pos[..., 2])
    root_translation[:, 2] += -min_h
    
    # adjust the height of the root to avoid ground penetration
    root_height_offset = retarget_data["root_height_offset"]
    root_translation[:, 2] += root_height_offset
    
    new_sk_state = SkeletonState.from_rotation_and_root_translation(target_motion.skeleton_tree, local_rotation, root_translation, is_local=True)
    target_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=target_motion.fps)

    # save retargeted motion
    target_motion.to_file(retarget_data["target_motion_path"])

    # visualize retargeted motion
    plot_skeleton_motion_interactive(target_motion)
    
    return

if __name__ == '__main__':
    main()

================================================
FILE: timechamber/tasks/base/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: timechamber/tasks/base/ma_vec_task.py
================================================
# Copyright (c) 2018-2021, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from typing import Dict, Any, Tuple

import gym
from gym import spaces

from isaacgym import gymtorch, gymapi
from isaacgym.torch_utils import to_torch
from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, \
    apply_random_samples, check_buckets, generate_random_samples

import torch
import numpy as np
import operator, random
from copy import deepcopy

import sys

import abc
from .vec_task import Env


class MA_VecTask(Env):

    def __init__(self, config, rl_device, sim_device, graphics_device_id, headless,
                 virtual_screen_capture: bool = False, force_render: bool = False):
        """Initialise the `MA_VecTask`.

        Args:
            config: config dictionary for the environment.
            sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu'
            graphics_device_id: the device ID to render with.
            headless: Set to False to disable viewer rendering.
        """
        super().__init__(config, rl_device, sim_device, graphics_device_id, headless)
        
        self.virtual_screen_capture = virtual_screen_capture
        self.force_render = force_render

        self.sim_params = self.__parse_sim_params(self.cfg["physics_engine"], self.cfg["sim"])
        if self.cfg["physics_engine"] == "physx":
            self.physics_engine = gymapi.SIM_PHYSX
        elif self.cfg["physics_engine"] == "flex":
            self.physics_engine = gymapi.SIM_FLEX
        else:
            msg = f"Invalid physics engine backend: {self.cfg['physics_engine']}"
            raise ValueError(msg)

        # optimization flags for pytorch JIT
        torch._C._jit_set_profiling_mode(False)
        torch._C._jit_set_profiling_executor(False)

        self.gym = gymapi.acquire_gym()

        self.first_randomization = True
        self.original_props = {}
        self.dr_randomizations = {}
        self.actor_params_generator = None
        self.extern_actor_params = {}
        self.last_step = -1
        self.last_rand_step = -1
        for env_id in range(self.num_envs):
            self.extern_actor_params[env_id] = None

        # create envs, sim and viewer
        self.sim_initialized = False
        self.create_sim()
        self.gym.prepare_sim(self.sim)
        self.sim_initialized = True

        self.set_viewer()
        self.allocate_buffers()

        self.obs_dict = {}

    def set_viewer(self):
        """Create the viewer."""

        # todo: read from config
        self.enable_viewer_sync = True
        self.viewer = None

        # if running with a viewer, set up keyboard shortcuts and camera
        if self.headless == False:
            # subscribe to keyboard shortcuts
            self.viewer = self.gym.create_viewer(
                self.sim, gymapi.CameraProperties())
            self.gym.subscribe_viewer_keyboard_event(
                self.viewer, gymapi.KEY_ESCAPE, "QUIT")
            self.gym.subscribe_viewer_keyboard_event(
                self.viewer, gymapi.KEY_V, "toggle_viewer_sync")

            # set the camera position based on up axis
            sim_params = self.gym.get_sim_params(self.sim)
            if sim_params.up_axis == gymapi.UP_AXIS_Z:
                cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
                cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
            else:
                cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
                cam_target = gymapi.Vec3(10.0, 0.0, 15.0)

            self.gym.viewer_camera_look_at(
                self.viewer, None, cam_pos, cam_target)

    def allocate_buffers(self):
        """Allocate the observation, states, etc. buffers.

        These are what is used to set observations and states in the environment classes which
        inherit from this one, and are read in `step` and other related functions.

        """

        # allocate buffers
        self.obs_buf = torch.zeros(
            (self.num_envs * self.num_agents, self.num_obs), device=self.device, dtype=torch.float)
        self.states_buf = torch.zeros(
            (self.num_envs, self.num_states), device=self.device, dtype=torch.float)
        self.rew_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.float)
        self.reset_buf = torch.ones(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.timeout_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.progress_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.randomize_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.extras = {}

    def set_sim_params_up_axis(self, sim_params: gymapi.SimParams, axis: str) -> int:
        """Set gravity based on up axis and return axis index.

        Args:
            sim_params: sim params to modify the axis for.
            axis: axis to set sim params for.
        Returns:
            axis index for up axis.
        """
        if axis == 'z':
            sim_params.up_axis = gymapi.UP_AXIS_Z
            sim_params.gravity.x = 0
            sim_params.gravity.y = 0
            sim_params.gravity.z = -9.81
            return 2
        return 1

    def create_sim(self, compute_device: int, graphics_device: int, physics_engine, sim_params: gymapi.SimParams):
        """Create an Isaac Gym sim object.

        Args:
            compute_device: ID of compute device to use.
            graphics_device: ID of graphics device to use.
            physics_engine: physics engine to use (`gymapi.SIM_PHYSX` or `gymapi.SIM_FLEX`)
            sim_params: sim params to use.
        Returns:
            the Isaac Gym sim object.
        """
        sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params)
        if sim is None:
            print("*** Failed to create sim")
            quit()

        return sim

    def get_state(self):
        """Returns the state buffer of the environment (the priviledged observations for asymmetric training)."""
        return torch.clamp(self.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

    @abc.abstractmethod
    def pre_physics_step(self, actions: torch.Tensor):
        """Apply the actions to the environment (eg by setting torques, position targets).

        Args:
            actions: the actions to apply
        """

    @abc.abstractmethod
    def post_physics_step(self):
        """Compute reward and observations, reset any environments that require it."""

    def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]:
        """Step the physics of the environment.

        Args:
            actions: actions to apply
        Returns:
            Observations, rewards, resets, info
            Observations are dict of observations (currently only one member called 'obs')
        """

        # randomize actions
        if self.dr_randomizations.get('actions', None):
            actions = self.dr_randomizations['actions']['noise_lambda'](actions)

        # apply actions
        self.pre_physics_step(actions)

        # step physics and render each frame
        for i in range(self.control_freq_inv):
            if self.force_render:
                self.render()
            self.gym.simulate(self.sim)

        # to fix!
        if self.device == 'cpu':
            self.gym.fetch_results(self.sim, True)

        # fill time out buffer
        self.timeout_buf = torch.where(self.progress_buf >= self.max_episode_length - 1,
                                       torch.ones_like(self.timeout_buf), torch.zeros_like(self.timeout_buf))

        # compute observations, rewards, resets, ...
        self.post_physics_step()

        # randomize observations
        if self.dr_randomizations.get('observations', None):
            self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)

        self.extras["time_outs"] = self.timeout_buf.to(self.rl_device)

        return

    def zero_actions(self) -> torch.Tensor:
        """Returns a buffer with zero actions.

        Returns:
            A buffer of zero torch actions
        """
        actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32,
                              device=self.rl_device)

        return actions

    def reset(self, env_ids=None) -> torch.Tensor:
        """Reset the environment.
        """
        if (env_ids is None):
            # zero_actions = self.zero_actions()
            # self.step(zero_actions)
            env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long)
            self.reset_idx(env_ids)
            self.compute_observations()
            self.pos_before = self.obs_buf[:self.num_envs, :2].clone()
        else:
            self._reset_envs(env_ids=env_ids)
        return

    def _reset_envs(self, env_ids):
        if (len(env_ids) > 0):
            self.reset_idx(env_ids)
            self.compute_observations()
            self.pos_before = self.obs_buf[:self.num_envs, :2].clone()
        return

    def reset_done(self):
        """Reset the environment.
        Returns:
            Observation dictionary, indices of environments being reset
        """
        done_env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
        if len(done_env_ids) > 0:
            self.reset_idx(done_env_ids)

        self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
        # asymmetric actor-critic
        if self.num_states > 0:
            self.obs_dict["states"] = self.get_state()

        return self.obs_dict, done_env_ids

    def render(self):
        """Draw the frame to the viewer, and check for keyboard events."""
        if self.viewer:
            # check for window closed
            if self.gym.query_viewer_has_closed(self.viewer):
                sys.exit()

            # check for keyboard events
            for evt in self.gym.query_viewer_action_events(self.viewer):
                if evt.action == "QUIT" and evt.value > 0:
                    sys.exit()
                elif evt.action == "toggle_viewer_sync" and evt.value > 0:
                    self.enable_viewer_sync = not self.enable_viewer_sync

            # fetch results
            if self.device != 'cpu':
                self.gym.fetch_results(self.sim, True)

            # step graphics
            if self.enable_viewer_sync:
                self.gym.step_graphics(self.sim)
                self.gym.draw_viewer(self.viewer, self.sim, True)

                # Wait for dt to elapse in real time.
                # This synchronizes the physics simulation with the rendering rate.
                self.gym.sync_frame_time(self.sim)

            else:
                self.gym.poll_viewer_events(self.viewer)

    def __parse_sim_params(self, physics_engine: str, config_sim: Dict[str, Any]) -> gymapi.SimParams:
        """Parse the config dictionary for physics stepping settings.

        Args:
            physics_engine: which physics engine to use. "physx" or "flex"
            config_sim: dict of sim configuration parameters
        Returns
            IsaacGym SimParams object with updated settings.
        """
        sim_params = gymapi.SimParams()

        # check correct up-axis
        if config_sim["up_axis"] not in ["z", "y"]:
            msg = f"Invalid physics up-axis: {config_sim['up_axis']}"
            print(msg)
            raise ValueError(msg)

        # assign general sim parameters
        sim_params.dt = config_sim["dt"]
        sim_params.num_client_threads = config_sim.get("num_client_threads", 0)
        sim_params.use_gpu_pipeline = config_sim["use_gpu_pipeline"]
        sim_params.substeps = config_sim.get("substeps", 2)

        # assign up-axis
        if config_sim["up_axis"] == "z":
            sim_params.up_axis = gymapi.UP_AXIS_Z
        else:
            sim_params.up_axis = gymapi.UP_AXIS_Y

        # assign gravity
        sim_params.gravity = gymapi.Vec3(*config_sim["gravity"])

        # configure physics parameters
        if physics_engine == "physx":
            # set the parameters
            if "physx" in config_sim:
                for opt in config_sim["physx"].keys():
                    if opt == "contact_collection":
                        setattr(sim_params.physx, opt, gymapi.ContactCollection(config_sim["physx"][opt]))
                    else:
                        setattr(sim_params.physx, opt, config_sim["physx"][opt])
        else:
            # set the parameters
            if "flex" in config_sim:
                for opt in config_sim["flex"].keys():
                    setattr(sim_params.flex, opt, config_sim["flex"][opt])

        # return the configured params
        return sim_params

    """
    Domain Randomization methods
    """

    def get_actor_params_info(self, dr_params: Dict[str, Any], env):
        """Generate a flat array of actor params, their names and ranges.

        Returns:
            The array
        """

        if "actor_params" not in dr_params:
            return None
        params = []
        names = []
        lows = []
        highs = []
        param_getters_map = get_property_getter_map(self.gym)
        for actor, actor_properties in dr_params["actor_params"].items():
            handle = self.gym.find_actor_handle(env, actor)
            for prop_name, prop_attrs in actor_properties.items():
                if prop_name == 'color':
                    continue  # this is set randomly
                props = param_getters_map[prop_name](env, handle)
                if not isinstance(props, list):
                    props = [props]
                for prop_idx, prop in enumerate(props):
                    for attr, attr_randomization_params in prop_attrs.items():
                        name = prop_name + '_' + str(prop_idx) + '_' + attr
                        lo_hi = attr_randomization_params['range']
                        distr = attr_randomization_params['distribution']
                        if 'uniform' not in distr:
                            lo_hi = (-1.0 * float('Inf'), float('Inf'))
                        if isinstance(prop, np.ndarray):
                            for attr_idx in range(prop[attr].shape[0]):
                                params.append(prop[attr][attr_idx])
                                names.append(name + '_' + str(attr_idx))
                                lows.append(lo_hi[0])
                                highs.append(lo_hi[1])
                        else:
                            params.append(getattr(prop, attr))
                            names.append(name)
                            lows.append(lo_hi[0])
                            highs.append(lo_hi[1])
        return params, names, lows, highs

    def apply_randomizations(self, dr_params):
        """Apply domain randomizations to the environment.

        Note that currently we can only apply randomizations only on resets, due to current PhysX limitations

        Args:
            dr_params: parameters for domain randomization to use.
        """

        # If we don't have a randomization frequency, randomize every step
        rand_freq = dr_params.get("frequency", 1)

        # First, determine what to randomize:
        #   - non-environment parameters when > frequency steps have passed since the last non-environment
        #   - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
        #   - on the first call, randomize everything
        self.last_step = self.gym.get_frame_count(self.sim)
        if self.first_randomization:
            do_nonenv_randomize = True
            env_ids = list(range(self.num_envs))
        else:
            do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
            rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf),
                                    torch.zeros_like(self.randomize_buf))
            rand_envs = torch.logical_and(rand_envs, self.reset_buf)
            env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
            self.randomize_buf[rand_envs] = 0

        if do_nonenv_randomize:
            self.last_rand_step = self.last_step

        param_setters_map = get_property_setter_map(self.gym)
        param_setter_defaults_map = get_default_setter_args(self.gym)
        param_getters_map = get_property_getter_map(self.gym)

        # On first iteration, check the number of buckets
        if self.first_randomization:
            check_buckets(self.gym, self.envs, dr_params)

        for nonphysical_param in ["observations", "actions"]:
            if nonphysical_param in dr_params and do_nonenv_randomize:
                dist = dr_params[nonphysical_param]["distribution"]
                op_type = dr_params[nonphysical_param]["operation"]
                sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[
                    nonphysical_param] else None
                sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[
                    nonphysical_param] else None
                op = operator.add if op_type == 'additive' else operator.mul

                if sched_type == 'linear':
                    sched_scaling = 1.0 / sched_step * \
                                    min(self.last_step, sched_step)
                elif sched_type == 'constant':
                    sched_scaling = 0 if self.last_step < sched_step else 1
                else:
                    sched_scaling = 1

                if dist == 'gaussian':
                    mu, var = dr_params[nonphysical_param]["range"]
                    mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])

                    if op_type == 'additive':
                        mu *= sched_scaling
                        var *= sched_scaling
                        mu_corr *= sched_scaling
                        var_corr *= sched_scaling
                    elif op_type == 'scaling':
                        var = var * sched_scaling  # scale up var over time
                        mu = mu * sched_scaling + 1.0 * \
                             (1.0 - sched_scaling)  # linearly interpolate

                        var_corr = var_corr * sched_scaling  # scale up var over time
                        mu_corr = mu_corr * sched_scaling + 1.0 * \
                                  (1.0 - sched_scaling)  # linearly interpolate

                    def noise_lambda(tensor, param_name=nonphysical_param):
                        params = self.dr_randomizations[param_name]
                        corr = params.get('corr', None)
                        if corr is None:
                            corr = torch.randn_like(tensor)
                            params['corr'] = corr
                        corr = corr * params['var_corr'] + params['mu_corr']
                        return op(
                            tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])

                    self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr,
                                                                 'var_corr': var_corr, 'noise_lambda': noise_lambda}

                elif dist == 'uniform':
                    lo, hi = dr_params[nonphysical_param]["range"]
                    lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])

                    if op_type == 'additive':
                        lo *= sched_scaling
                        hi *= sched_scaling
                        lo_corr *= sched_scaling
                        hi_corr *= sched_scaling
                    elif op_type == 'scaling':
                        lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)

                    def noise_lambda(tensor, param_name=nonphysical_param):
                        params = self.dr_randomizations[param_name]
                        corr = params.get('corr', None)
                        if corr is None:
                            corr = torch.randn_like(tensor)
                            params['corr'] = corr
                        corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
                        return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])

                    self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr,
                                                                 'hi_corr': hi_corr, 'noise_lambda': noise_lambda}

        if "sim_params" in dr_params and do_nonenv_randomize:
            prop_attrs = dr_params["sim_params"]
            prop = self.gym.get_sim_params(self.sim)

            if self.first_randomization:
                self.original_props["sim_params"] = {
                    attr: getattr(prop, attr) for attr in dir(prop)}

            for attr, attr_randomization_params in prop_attrs.items():
                apply_random_samples(
                    prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)

            self.gym.set_sim_params(self.sim, prop)

        # If self.actor_params_generator is initialized: use it to
        # sample actor simulation params. This gives users the
        # freedom to generate samples from arbitrary distributions,
        # e.g. use full-covariance distributions instead of the DR's
        # default of treating each simulation parameter independently.
        extern_offsets = {}
        if self.actor_params_generator is not None:
            for env_id in env_ids:
                self.extern_actor_params[env_id] = \
                    self.actor_params_generator.sample()
                extern_offsets[env_id] = 0

        for actor, actor_properties in dr_params["actor_params"].items():
            for env_id in env_ids:
                env = self.envs[env_id]
                handle = self.gym.find_actor_handle(env, actor)
                extern_sample = self.extern_actor_params[env_id]

                for prop_name, prop_attrs in actor_properties.items():
                    if prop_name == 'color':
                        num_bodies = self.gym.get_actor_rigid_body_count(
                            env, handle)
                        for n in range(num_bodies):
                            self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
                                                          gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1),
                                                                      random.uniform(0, 1)))
                        continue
                    if prop_name == 'scale':
                        setup_only = prop_attrs.get('setup_only', False)
                        if (setup_only and not self.sim_initialized) or not setup_only:
                            attr_randomization_params = prop_attrs
                            sample = generate_random_samples(attr_randomization_params, 1,
                                                             self.last_step, None)
                            og_scale = 1
                            if attr_randomization_params['operation'] == 'scaling':
                                new_scale = og_scale * sample
                            elif attr_randomization_params['operation'] == 'additive':
                                new_scale = og_scale + sample
                            self.gym.set_actor_scale(env, handle, new_scale)
                        continue

                    prop = param_getters_map[prop_name](env, handle)
                    set_random_properties = True
                    if isinstance(prop, list):
                        if self.first_randomization:
                            self.original_props[prop_name] = [
                                {attr: getattr(p, attr) for attr in dir(p)} for p in prop]
                        for p, og_p in zip(prop, self.original_props[prop_name]):
                            for attr, attr_randomization_params in prop_attrs.items():
                                setup_only = attr_randomization_params.get('setup_only', False)
                                if (setup_only and not self.sim_initialized) or not setup_only:
                                    smpl = None
                                    if self.actor_params_generator is not None:
                                        smpl, extern_offsets[env_id] = get_attr_val_from_sample(
                                            extern_sample, extern_offsets[env_id], p, attr)
                                    apply_random_samples(
                                        p, og_p, attr, attr_randomization_params,
                                        self.last_step, smpl)
                                else:
                                    set_random_properties = False
                    else:
                        if self.first_randomization:
                            self.original_props[prop_name] = deepcopy(prop)
                        for attr, attr_randomization_params in prop_attrs.items():
                            setup_only = attr_randomization_params.get('setup_only', False)
                            if (setup_only and not self.sim_initialized) or not setup_only:
                                smpl = None
                                if self.actor_params_generator is not None:
                                    smpl, extern_offsets[env_id] = get_attr_val_from_sample(
                                        extern_sample, extern_offsets[env_id], prop, attr)
                                apply_random_samples(
                                    prop, self.original_props[prop_name], attr,
                                    attr_randomization_params, self.last_step, smpl)
                            else:
                                set_random_properties = False

                    if set_random_properties:
                        setter = param_setters_map[prop_name]
                        default_args = param_setter_defaults_map[prop_name]
                        setter(env, handle, prop, *default_args)

        if self.actor_params_generator is not None:
            for env_id in env_ids:  # check that we used all dims in sample
                if extern_offsets[env_id] > 0:
                    extern_sample = self.extern_actor_params[env_id]
                    if extern_offsets[env_id] != extern_sample.shape[0]:
                        print('env_id', env_id,
                              'extern_offset', extern_offsets[env_id],
                              'vs extern_sample.shape', extern_sample.shape)
                        raise Exception("Invalid extern_sample size")

        self.first_randomization = False


================================================
FILE: timechamber/tasks/base/vec_task.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from typing import Dict, Any, Tuple

import gym
from gym import spaces

from isaacgym import gymtorch, gymapi
from isaacgym.torch_utils import to_torch
from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples

import torch
import numpy as np
import operator, random
from copy import deepcopy
import sys

import abc
from abc import ABC

EXISTING_SIM = None
SCREEN_CAPTURE_RESOLUTION = (1027, 768)

def _create_sim_once(gym, *args, **kwargs):
    global EXISTING_SIM
    if EXISTING_SIM is not None:
        return EXISTING_SIM
    else:
        EXISTING_SIM = gym.create_sim(*args, **kwargs)
        return EXISTING_SIM


class Env(ABC):
    def __init__(self, config: Dict[str, Any], rl_device: str, sim_device: str, graphics_device_id: int, headless: bool):
        """Initialise the env.

        Args:
            config: the configuration dictionary.
            sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu'
            graphics_device_id: the device ID to render with.
            headless: Set to False to disable viewer rendering.
        """

        split_device = sim_device.split(":")
        self.device_type = split_device[0]
        self.device_id = int(split_device[1]) if len(split_device) > 1 else 0

        self.device = "cpu"
        if config["sim"]["use_gpu_pipeline"]:
            if self.device_type.lower() == "cuda" or self.device_type.lower() == "gpu":
                self.device = "cuda" + ":" + str(self.device_id)
            else:
                print("GPU Pipeline can only be used with GPU simulation. Forcing CPU Pipeline.")
                config["sim"]["use_gpu_pipeline"] = False

        self.rl_device = rl_device

        # Rendering
        # if training in a headless mode
        self.headless = headless

        enable_camera_sensors = config.get("enableCameraSensors", False)
        self.graphics_device_id = graphics_device_id
        if enable_camera_sensors == False and self.headless == True:
            self.graphics_device_id = -1

        self.num_environments = config["env"]["numEnvs"]
        self.num_agents = config["env"].get("numAgents", 1)  # used for multi-agent environments
        self.num_observations = config["env"]["numObservations"]
        self.num_states = config["env"].get("numStates", 0)
        self.num_actions = config["env"]["numActions"]

        self.control_freq_inv = config["env"].get("controlFrequencyInv", 1)

        self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf)
        self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf)

        self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.)

        self.clip_obs = config["env"].get("clipObservations", np.Inf)
        self.clip_actions = config["env"].get("clipActions", np.Inf)

    @abc.abstractmethod 
    def allocate_buffers(self):
        """Create torch buffers for observations, rewards, actions dones and any additional data."""

    @abc.abstractmethod
    def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]:
        """Step the physics of the environment.

        Args:
            actions: actions to apply
        Returns:
            Observations, rewards, resets, info
            Observations are dict of observations (currently only one member called 'obs')
        """

    @abc.abstractmethod
    def reset(self)-> Dict[str, torch.Tensor]:
        """Reset the environment.
        Returns:
            Observation dictionary
        """

    @abc.abstractmethod
    def reset_idx(self, env_ids: torch.Tensor):
        """Reset environments having the provided indices.
        Args:
            env_ids: environments to reset
        """

    @property
    def observation_space(self) -> gym.Space:
        """Get the environment's observation space."""
        return self.obs_space

    @property
    def action_space(self) -> gym.Space:
        """Get the environment's action space."""
        return self.act_space

    @property
    def num_envs(self) -> int:
        """Get the number of environments."""
        return self.num_environments

    @property
    def num_acts(self) -> int:
        """Get the number of actions in the environment."""
        return self.num_actions

    @property
    def num_obs(self) -> int:
        """Get the number of observations in the environment."""
        return self.num_observations


class VecTask(Env):

    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 24}

    def __init__(self, config, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture: bool = False, force_render: bool = False):
        """Initialise the `VecTask`.

        Args:
            config: config dictionary for the environment.
            sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu'
            graphics_device_id: the device ID to render with.
            headless: Set to False to disable viewer rendering.
            virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. 
            force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True)
        """
        super().__init__(config, rl_device, sim_device, graphics_device_id, headless)
        self.virtual_screen_capture = virtual_screen_capture
        self.virtual_display = None
        if self.virtual_screen_capture:
            from pyvirtualdisplay.smartdisplay import SmartDisplay
            self.virtual_display = SmartDisplay(size=SCREEN_CAPTURE_RESOLUTION)
            self.virtual_display.start()
        self.force_render = force_render

        self.sim_params = self.__parse_sim_params(self.cfg["physics_engine"], self.cfg["sim"])
        if self.cfg["physics_engine"] == "physx":
            self.physics_engine = gymapi.SIM_PHYSX
        elif self.cfg["physics_engine"] == "flex":
            self.physics_engine = gymapi.SIM_FLEX
        else:
            msg = f"Invalid physics engine backend: {self.cfg['physics_engine']}"
            raise ValueError(msg)

        # optimization flags for pytorch JIT
        torch._C._jit_set_profiling_mode(False)
        torch._C._jit_set_profiling_executor(False)

        self.gym = gymapi.acquire_gym()

        self.first_randomization = True
        self.original_props = {}
        self.dr_randomizations = {}
        self.actor_params_generator = None
        self.extern_actor_params = {}
        self.last_step = -1
        self.last_rand_step = -1
        for env_id in range(self.num_envs):
            self.extern_actor_params[env_id] = None

        # create envs, sim and viewer
        self.sim_initialized = False
        self.create_sim()
        self.gym.prepare_sim(self.sim)
        self.sim_initialized = True

        self.set_viewer()
        self.allocate_buffers()

        self.obs_dict = {}

    def set_viewer(self):
        """Create the viewer."""

        # todo: read from config
        self.enable_viewer_sync = True
        self.viewer = None

        # if running with a viewer, set up keyboard shortcuts and camera
        if self.headless == False:
            # subscribe to keyboard shortcuts
            self.viewer = self.gym.create_viewer(
                self.sim, gymapi.CameraProperties())
            self.gym.subscribe_viewer_keyboard_event(
                self.viewer, gymapi.KEY_ESCAPE, "QUIT")
            self.gym.subscribe_viewer_keyboard_event(
                self.viewer, gymapi.KEY_V, "toggle_viewer_sync")

            # set the camera position based on up axis
            sim_params = self.gym.get_sim_params(self.sim)
            if sim_params.up_axis == gymapi.UP_AXIS_Z:
                cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
                cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
            else:
                cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
                cam_target = gymapi.Vec3(10.0, 0.0, 15.0)

            self.gym.viewer_camera_look_at(
                self.viewer, None, cam_pos, cam_target)

    def allocate_buffers(self):
        """Allocate the observation, states, etc. buffers.

        These are what is used to set observations and states in the environment classes which
        inherit from this one, and are read in `step` and other related functions.

        """

        # allocate buffers
        self.obs_buf = torch.zeros(
            (self.num_envs, self.num_obs), device=self.device, dtype=torch.float)
        self.states_buf = torch.zeros(
            (self.num_envs, self.num_states), device=self.device, dtype=torch.float)
        self.rew_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.float)
        self.reset_buf = torch.ones(
            self.num_envs, device=self.device, dtype=torch.long)
        self.timeout_buf = torch.zeros(
             self.num_envs, device=self.device, dtype=torch.long)
        self.progress_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.randomize_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.extras = {}

    def create_sim(self, compute_device: int, graphics_device: int, physics_engine, sim_params: gymapi.SimParams):
        """Create an Isaac Gym sim object.

        Args:
            compute_device: ID of compute device to use.
            graphics_device: ID of graphics device to use.
            physics_engine: physics engine to use (`gymapi.SIM_PHYSX` or `gymapi.SIM_FLEX`)
            sim_params: sim params to use.
        Returns:
            the Isaac Gym sim object.
        """
        sim = _create_sim_once(self.gym, compute_device, graphics_device, physics_engine, sim_params)
        if sim is None:
            print("*** Failed to create sim")
            quit()

        return sim

    def get_state(self):
        """Returns the state buffer of the environment (the privileged observations for asymmetric training)."""
        return torch.clamp(self.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

    @abc.abstractmethod
    def pre_physics_step(self, actions: torch.Tensor):
        """Apply the actions to the environment (eg by setting torques, position targets).

        Args:
            actions: the actions to apply
        """

    @abc.abstractmethod
    def post_physics_step(self):
        """Compute reward and observations, reset any environments that require it."""

    def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]:
        """Step the physics of the environment.

        Args:
            actions: actions to apply
        Returns:
            Observations, rewards, resets, info
            Observations are dict of observations (currently only one member called 'obs')
        """

        # randomize actions
        if self.dr_randomizations.get('actions', None):
            actions = self.dr_randomizations['actions']['noise_lambda'](actions)

        action_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions)
        # apply actions
        self.pre_physics_step(action_tensor)

        # step physics and render each frame
        for i in range(self.control_freq_inv):
            if self.force_render:
                self.render()
            self.gym.simulate(self.sim)

        # to fix!
        if self.device == 'cpu':
            self.gym.fetch_results(self.sim, True)

        # compute observations, rewards, resets, ...
        self.post_physics_step()

        # fill time out buffer: set to 1 if we reached the max episode length AND the reset buffer is 1. Timeout == 1 makes sense only if the reset buffer is 1.
        self.timeout_buf = (self.progress_buf >= self.max_episode_length - 1) & (self.reset_buf != 0)

        # randomize observations
        if self.dr_randomizations.get('observations', None):
            self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)

        self.extras["time_outs"] = self.timeout_buf.to(self.rl_device)

        self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

        # asymmetric actor-critic
        if self.num_states > 0:
            self.obs_dict["states"] = self.get_state()

        return self.obs_dict, self.rew_buf.to(self.rl_device), self.reset_buf.to(self.rl_device), self.extras

    def zero_actions(self) -> torch.Tensor:
        """Returns a buffer with zero actions.

        Returns:
            A buffer of zero torch actions
        """
        actions = torch.zeros([self.num_envs, self.num_actions], dtype=torch.float32, device=self.rl_device)

        return actions

    def reset_idx(self, env_idx):
        """Reset environment with indces in env_idx. 
        Should be implemented in an environment class inherited from VecTask.
        """  
        pass

    def reset(self):
        """Is called only once when environment starts to provide the first observations.
        Doesn't calculate observations. Actual reset and observation calculation need to be implemented by user.
        Returns:
            Observation dictionary
        """
        self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

        # asymmetric actor-critic
        if self.num_states > 0:
            self.obs_dict["states"] = self.get_state()

        return self.obs_dict

    def reset_done(self):
        """Reset the environment.
        Returns:
            Observation dictionary, indices of environments being reset
        """
        done_env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
        if len(done_env_ids) > 0:
            self.reset_idx(done_env_ids)

        self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

        # asymmetric actor-critic
        if self.num_states > 0:
            self.obs_dict["states"] = self.get_state()

        return self.obs_dict, done_env_ids

    def render(self, mode="rgb_array"):
        """Draw the frame to the viewer, and check for keyboard events."""
        if self.viewer:
            # check for window closed
            if self.gym.query_viewer_has_closed(self.viewer):
                sys.exit()

            # check for keyboard events
            for evt in self.gym.query_viewer_action_events(self.viewer):
                if evt.action == "QUIT" and evt.value > 0:
                    sys.exit()
                elif evt.action == "toggle_viewer_sync" and evt.value > 0:
                    self.enable_viewer_sync = not self.enable_viewer_sync

            # fetch results
            if self.device != 'cpu':
                self.gym.fetch_results(self.sim, True)

            # step graphics
            if self.enable_viewer_sync:
                self.gym.step_graphics(self.sim)
                self.gym.draw_viewer(self.viewer, self.sim, True)

                # Wait for dt to elapse in real time.
                # This synchronizes the physics simulation with the rendering rate.
                self.gym.sync_frame_time(self.sim)

            else:
                self.gym.poll_viewer_events(self.viewer)

            if self.virtual_display and mode == "rgb_array":
                img = self.virtual_display.grab()
                return np.array(img)

    def __parse_sim_params(self, physics_engine: str, config_sim: Dict[str, Any]) -> gymapi.SimParams:
        """Parse the config dictionary for physics stepping settings.

        Args:
            physics_engine: which physics engine to use. "physx" or "flex"
            config_sim: dict of sim configuration parameters
        Returns
            IsaacGym SimParams object with updated settings.
        """
        sim_params = gymapi.SimParams()

        # check correct up-axis
        if config_sim["up_axis"] not in ["z", "y"]:
            msg = f"Invalid physics up-axis: {config_sim['up_axis']}"
            print(msg)
            raise ValueError(msg)

        # assign general sim parameters
        sim_params.dt = config_sim["dt"]
        sim_params.num_client_threads = config_sim.get("num_client_threads", 0)
        sim_params.use_gpu_pipeline = config_sim["use_gpu_pipeline"]
        sim_params.substeps = config_sim.get("substeps", 2)

        # assign up-axis
        if config_sim["up_axis"] == "z":
            sim_params.up_axis = gymapi.UP_AXIS_Z
        else:
            sim_params.up_axis = gymapi.UP_AXIS_Y

        # assign gravity
        sim_params.gravity = gymapi.Vec3(*config_sim["gravity"])

        # configure physics parameters
        if physics_engine == "physx":
            # set the parameters
            if "physx" in config_sim:
                for opt in config_sim["physx"].keys():
                    if opt == "contact_collection":
                        setattr(sim_params.physx, opt, gymapi.ContactCollection(config_sim["physx"][opt]))
                    else:
                        setattr(sim_params.physx, opt, config_sim["physx"][opt])
        else:
            # set the parameters
            if "flex" in config_sim:
                for opt in config_sim["flex"].keys():
                    setattr(sim_params.flex, opt, config_sim["flex"][opt])

        # return the configured params
        return sim_params

    """
    Domain Randomization methods
    """

    def get_actor_params_info(self, dr_params: Dict[str, Any], env):
        """Generate a flat array of actor params, their names and ranges.

        Returns:
            The array
        """

        if "actor_params" not in dr_params:
            return None
        params = []
        names = []
        lows = []
        highs = []
        param_getters_map = get_property_getter_map(self.gym)
        for actor, actor_properties in dr_params["actor_params"].items():
            handle = self.gym.find_actor_handle(env, actor)
            for prop_name, prop_attrs in actor_properties.items():
                if prop_name == 'color':
                    continue  # this is set randomly
                props = param_getters_map[prop_name](env, handle)
                if not isinstance(props, list):
                    props = [props]
                for prop_idx, prop in enumerate(props):
                    for attr, attr_randomization_params in prop_attrs.items():
                        name = prop_name+'_' + str(prop_idx) + '_'+attr
                        lo_hi = attr_randomization_params['range']
                        distr = attr_randomization_params['distribution']
                        if 'uniform' not in distr:
                            lo_hi = (-1.0*float('Inf'), float('Inf'))
                        if isinstance(prop, np.ndarray):
                            for attr_idx in range(prop[attr].shape[0]):
                                params.append(prop[attr][attr_idx])
                                names.append(name+'_'+str(attr_idx))
                                lows.append(lo_hi[0])
                                highs.append(lo_hi[1])
                        else:
                            params.append(getattr(prop, attr))
                            names.append(name)
                            lows.append(lo_hi[0])
                            highs.append(lo_hi[1])
        return params, names, lows, highs

    def apply_randomizations(self, dr_params):
        """Apply domain randomizations to the environment.

        Note that currently we can only apply randomizations only on resets, due to current PhysX limitations

        Args:
            dr_params: parameters for domain randomization to use.
        """

        # If we don't have a randomization frequency, randomize every step
        rand_freq = dr_params.get("frequency", 1)

        # First, determine what to randomize:
        #   - non-environment parameters when > frequency steps have passed since the last non-environment
        #   - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
        #   - on the first call, randomize everything
        self.last_step = self.gym.get_frame_count(self.sim)
        if self.first_randomization:
            do_nonenv_randomize = True
            env_ids = list(range(self.num_envs))
        else:
            do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
            rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf))
            rand_envs = torch.logical_and(rand_envs, self.reset_buf)
            env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
            self.randomize_buf[rand_envs] = 0

        if do_nonenv_randomize:
            self.last_rand_step = self.last_step

        param_setters_map = get_property_setter_map(self.gym)
        param_setter_defaults_map = get_default_setter_args(self.gym)
        param_getters_map = get_property_getter_map(self.gym)

        # On first iteration, check the number of buckets
        if self.first_randomization:
            check_buckets(self.gym, self.envs, dr_params)

        for nonphysical_param in ["observations", "actions"]:
            if nonphysical_param in dr_params and do_nonenv_randomize:
                dist = dr_params[nonphysical_param]["distribution"]
                op_type = dr_params[nonphysical_param]["operation"]
                sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None
                sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None
                op = operator.add if op_type == 'additive' else operator.mul

                if sched_type == 'linear':
                    sched_scaling = 1.0 / sched_step * \
                        min(self.last_step, sched_step)
                elif sched_type == 'constant':
                    sched_scaling = 0 if self.last_step < sched_step else 1
                else:
                    sched_scaling = 1

                if dist == 'gaussian':
                    mu, var = dr_params[nonphysical_param]["range"]
                    mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])

                    if op_type == 'additive':
                        mu *= sched_scaling
                        var *= sched_scaling
                        mu_corr *= sched_scaling
                        var_corr *= sched_scaling
                    elif op_type == 'scaling':
                        var = var * sched_scaling  # scale up var over time
                        mu = mu * sched_scaling + 1.0 * \
                            (1.0 - sched_scaling)  # linearly interpolate

                        var_corr = var_corr * sched_scaling  # scale up var over time
                        mu_corr = mu_corr * sched_scaling + 1.0 * \
                            (1.0 - sched_scaling)  # linearly interpolate

                    def noise_lambda(tensor, param_name=nonphysical_param):
                        params = self.dr_randomizations[param_name]
                        corr = params.get('corr', None)
                        if corr is None:
                            corr = torch.randn_like(tensor)
                            params['corr'] = corr
                        corr = corr * params['var_corr'] + params['mu_corr']
                        return op(
                            tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])

                    self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda}

                elif dist == 'uniform':
                    lo, hi = dr_params[nonphysical_param]["range"]
                    lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])

                    if op_type == 'additive':
                        lo *= sched_scaling
                        hi *= sched_scaling
                        lo_corr *= sched_scaling
                        hi_corr *= sched_scaling
                    elif op_type == 'scaling':
                        lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
                        hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)

                    def noise_lambda(tensor, param_name=nonphysical_param):
                        params = self.dr_randomizations[param_name]
                        corr = params.get('corr', None)
                        if corr is None:
                            corr = torch.randn_like(tensor)
                            params['corr'] = corr
                        corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
                        return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])

                    self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda}

        if "sim_params" in dr_params and do_nonenv_randomize:
            prop_attrs = dr_params["sim_params"]
            prop = self.gym.get_sim_params(self.sim)

            if self.first_randomization:
                self.original_props["sim_params"] = {
                    attr: getattr(prop, attr) for attr in dir(prop)}

            for attr, attr_randomization_params in prop_attrs.items():
                apply_random_samples(
                    prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)

            self.gym.set_sim_params(self.sim, prop)

        # If self.actor_params_generator is initialized: use it to
        # sample actor simulation params. This gives users the
        # freedom to generate samples from arbitrary distributions,
        # e.g. use full-covariance distributions instead of the DR's
        # default of treating each simulation parameter independently.
        extern_offsets = {}
        if self.actor_params_generator is not None:
            for env_id in env_ids:
                self.extern_actor_params[env_id] = \
                    self.actor_params_generator.sample()
                extern_offsets[env_id] = 0

        for actor, actor_properties in dr_params["actor_params"].items():
            for env_id in env_ids:
                env = self.envs[env_id]
                handle = self.gym.find_actor_handle(env, actor)
                extern_sample = self.extern_actor_params[env_id]

                for prop_name, prop_attrs in actor_properties.items():
                    if prop_name == 'color':
                        num_bodies = self.gym.get_actor_rigid_body_count(
                            env, handle)
                        for n in range(num_bodies):
                            self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
                                                          gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)))
                        continue
                    if prop_name == 'scale':
                        setup_only = prop_attrs.get('setup_only', False)
                        if (setup_only and not self.sim_initialized) or not setup_only:
                            attr_randomization_params = prop_attrs
                            sample = generate_random_samples(attr_randomization_params, 1,
                                                             self.last_step, None)
                            og_scale = 1
                            if attr_randomization_params['operation'] == 'scaling':
                                new_scale = og_scale * sample
                            elif attr_randomization_params['operation'] == 'additive':
                                new_scale = og_scale + sample
                            self.gym.set_actor_scale(env, handle, new_scale)
                        continue

                    prop = param_getters_map[prop_name](env, handle)
                    set_random_properties = True
                    if isinstance(prop, list):
                        if self.first_randomization:
                            self.original_props[prop_name] = [
                                {attr: getattr(p, attr) for attr in dir(p)} for p in prop]
                        for p, og_p in zip(prop, self.original_props[prop_name]):
                            for attr, attr_randomization_params in prop_attrs.items():
                                setup_only = attr_randomization_params.get('setup_only', False)
                                if (setup_only and not self.sim_initialized) or not setup_only:
                                    smpl = None
                                    if self.actor_params_generator is not None:
                                        smpl, extern_offsets[env_id] = get_attr_val_from_sample(
                                            extern_sample, extern_offsets[env_id], p, attr)
                                    apply_random_samples(
                                        p, og_p, attr, attr_randomization_params,
                                        self.last_step, smpl)
                                else:
                                    set_random_properties = False
                    else:
                        if self.first_randomization:
                            self.original_props[prop_name] = deepcopy(prop)
                        for attr, attr_randomization_params in prop_attrs.items():
                            setup_only = attr_randomization_params.get('setup_only', False)
                            if (setup_only and not self.sim_initialized) or not setup_only:
                                smpl = None
                                if self.actor_params_generator is not None:
                                    smpl, extern_offsets[env_id] = get_attr_val_from_sample(
                                        extern_sample, extern_offsets[env_id], prop, attr)
                                apply_random_samples(
                                    prop, self.original_props[prop_name], attr,
                                    attr_randomization_params, self.last_step, smpl)
                            else:
                                set_random_properties = False

                    if set_random_properties:
                        setter = param_setters_map[prop_name]
                        default_args = param_setter_defaults_map[prop_name]
                        setter(env, handle, prop, *default_args)

        if self.actor_params_generator is not None:
            for env_id in env_ids:  # check that we used all dims in sample
                if extern_offsets[env_id] > 0:
                    extern_sample = self.extern_actor_params[env_id]
                    if extern_offsets[env_id] != extern_sample.shape[0]:
                        print('env_id', env_id,
                              'extern_offset', extern_offsets[env_id],
                              'vs extern_sample.shape', extern_sample.shape)
                        raise Exception("Invalid extern_sample size")

        self.first_randomization = False


================================================
FILE: timechamber/tasks/data/assets/mjcf/amp_humanoid_sword_shield.xml
================================================
<mujoco model="humanoid">

  <statistic extent="2" center="0 0 1"/>

  <option timestep="0.00555"/>

  <default>
    <motor ctrlrange="-1 1" ctrllimited="true"/>
    <default class="body">
      <geom type="capsule" condim="1" friction="1.0 0.05 0.05" solimp=".9 .99 .003" solref=".015 1"/>
      <joint type="hinge" damping="0.1" stiffness="5" armature=".007" limited="true" solimplimit="0 .99 .01"/>
      <site size=".04" group="3"/>
      <default class="force-torque">
        <site type="box" size=".01 .01 .02" rgba="1 0 0 1" />
      </default>
      <default class="touch">
        <site type="capsule" rgba="0 0 1 .3"/>
      </default>
    </default>
  </default>

  <worldbody>
    <geom name="floor" type="plane" conaffinity="1" size="100 100 .2" material="grid"/>
    <body name="pelvis" pos="0 0 1" childclass="body">
      <freejoint name="root"/>
      <site name="root" class="force-torque"/>
      <geom name="pelvis" type="sphere" pos="0 0 0.07" size=".09" density="2226"/>
      <geom name="upper_waist" type="sphere" pos="0 0 0.205" size="0.07" density="2226"/>
      <site name="pelvis" class="touch" type="sphere" pos="0 0 0.07" size="0.091"/>
      <site name="upper_waist" class="touch" type="sphere" pos="0 0 0.205" size="0.071"/>

      <body name="torso" pos="0 0 0.236151">
        <light name="top" pos="0 0 2" mode="trackcom"/>
        <camera name="back" pos="-3 0 1" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
        <camera name="side" pos="0 -3 1" xyaxes="1 0 0 0 1 2" mode="trackcom"/>
        <joint name="abdomen_x" pos="0 0 0" axis="1 0 0" range="-60 60" stiffness="1000" damping="100" armature=".02"/>
        <joint name="abdomen_y" pos="0 0 0" axis="0 1 0" range="-60 90" stiffness="1000" damping="100" armature=".02"/>
        <joint name="abdomen_z" pos="0 0 0" axis="0 0 1" range="-50 50" stiffness="1000" damping="100" armature=".02"/>
        <geom name="torso" type="sphere" pos="0 0 0.12" size="0.11" density="1794"/>
        <site name="torso" class="touch" type="sphere" pos="0 0 0.12" size="0.111"/>

        <geom name="right_clavicle" fromto="-0.0060125 -0.0457775 0.2287955 -0.016835 -0.128177 0.2376182" size=".045" density="1100"/>
        <geom name="left_clavicle" fromto="-0.0060125 0.0457775 0.2287955 -0.016835 0.128177 0.2376182" size=".045" density="1100"/>

        <body name="head" pos="0 0 0.223894">
          <joint name="neck_x" axis="1 0 0" range="-50 50" stiffness="100" damping="10" armature=".01"/>
          <joint name="neck_y" axis="0 1 0" range="-40 60" stiffness="100" damping="10" armature=".01"/>
          <joint name="neck_z" axis="0 0 1" range="-45 45" stiffness="100" damping="10" armature=".01"/>
          <geom name="head" type="sphere" pos="0 0 0.175" size="0.095" density="1081"/>
          <site name="head" class="touch" pos="0 0 0.175" type="sphere" size="0.103"/>
          <camera name="egocentric" pos=".103 0 0.175" xyaxes="0 -1 0 .1 0 1" fovy="80"/>
        </body>

        <body name="right_upper_arm" pos="-0.02405 -0.18311 0.24350">
          <joint name="right_shoulder_x" axis="1 0 0" range="-180 45" stiffness="400" damping="40" armature=".02"/>
          <joint name="right_shoulder_y" axis="0 1 0" range="-180 60" stiffness="400" damping="40" armature=".02"/>
          <joint name="right_shoulder_z" axis="0 0 1"  range="-90 90" stiffness="400" damping="40" armature=".02"/>
          <geom name="right_upper_arm" fromto="0 0 -0.05 0 0 -0.23" size=".045" density="982"/>
          <site name="right_upper_arm" class="touch" pos="0 0 -0.14" size="0.046 0.1" zaxis="0 0 1"/>

          <body name="right_lower_arm" pos="0 0 -0.274788">
            <joint name="right_elbow" axis="0 1 0" range="-160 0" stiffness="300" damping="30" armature=".01"/>
            <geom name="right_lower_arm" fromto="0 0 -0.0525 0 0 -0.1875" size="0.04" density="1056"/>
            <site name="right_lower_arm" class="touch" pos="0 0 -0.12" size="0.041 0.0685" zaxis="0 1 0"/>

            <body name="right_hand" pos="0 0 -0.258947">
              <joint name="right_hand_x" axis="1 0 0" range="-90 90" stiffness="100" damping="10" armature=".01"/>
			        <joint name="right_hand_y" axis="0 1 0" range="-90 90" stiffness="100" damping="10" armature=".003"/>
			        <joint name="right_hand_z" axis="0 0 1"  range="-90 90" stiffness="100" damping="10" armature=".003"/>
			        <geom name="right_hand" type="sphere" size=".04" density="1865"/>
              <site name="right_hand" class="touch" type="sphere" size=".041"/>

              <body name="sword" pos="0.74 0 0">
                <geom name="sword_hilt" fromto="-0.87 0 0 -0.64 0 0" size="0.023" density="300"/>
			          <geom name="sword_blade" type="box" pos="-0.34 0 0" size="0.34 0.01 0.035" density="600"/>
              </body>
            </body>
          </body>
        </body>

        <body name="left_upper_arm" pos="-0.02405 0.18311 0.24350">
          <joint name="left_shoulder_x" axis="1 0 0" range="-45 180" stiffness="400" damping="40" armature=".02"/>
          <joint name="left_shoulder_y" axis="0 1 0" range="-180 60" stiffness="400" damping="40" armature=".02"/>
          <joint name="left_shoulder_z" axis="0 0 1"  range="-90 90" stiffness="400" damping="40" armature=".02"/>
          <geom name="left_upper_arm" fromto="0 0 -0.05 0 0 -0.23" size="0.045" density="982"/>
          <site name="left_upper_arm" class="touch" pos="0 0 -0.14" size="0.046 0.1" zaxis="0 0 1"/>

          <body name="left_lower_arm" pos="0 0 -0.274788">
            <joint name="left_elbow" axis="0 1 0" range="-160 0" stiffness="300" damping="30" armature=".01"/>
            <geom name="left_lower_arm" fromto="0 0 -0.0525 0 0 -0.1875" size="0.04" density="1056"/>
            <site name="left_lower_arm" class="touch" pos="0 0 -0.1" size="0.041 0.0685" zaxis="0 0 1"/>

            <body name="shield" pos="0 0.07 -0.12">
              <geom name="shield" type="cylinder" fromto="0 0 0 0 0.03 0" size="0.3" density="250"/>
            </body>
            
            <body name="left_hand" pos="0 0 -0.258947">
              <geom name="left_hand" type="sphere" size=".04" density="1865"/>
              <site name="left_hand" class="touch" type="sphere" size=".041"/>
            </body>
          </body>
        </body>
      </body>

      <body name="right_thigh" pos="0 -0.084887 0">
        <site name="right_hip" class="force-torque"/>
        <joint name="right_hip_x" axis="1 0 0" range="-60 15" stiffness="500" damping="50" armature=".02"/>
        <joint name="right_hip_y" axis="0 1 0" range="-140 60" stiffness="500" damping="50" armature=".02"/>
        <joint name="right_hip_z" axis="0 0 1" range="-60 35" stiffness="500" damping="50" armature=".02"/>
        <geom name="right_thigh" fromto="0 0 -0.06 0 0 -0.36" size="0.055" density="1269"/>
        <site name="right_thigh" class="touch" pos="0 0 -0.21" size="0.056 0.301" zaxis="0 0 -1"/>

        <body name="right_shin" pos="0 0 -0.421546">
          <site name="right_knee" class="force-torque" pos="0 0 0"/>
          <joint name="right_knee" pos="0 0 0" axis="0 1 0" range="0 160" stiffness="500" damping="50" armature=".02"/>
          <geom name="right_shin" fromto="0 0 -0.045 0 0 -0.355"  size=".05" density="1014"/>
          <site name="right_shin" class="touch" pos="0 0 -0.2" size="0.051 0.156" zaxis="0 0 -1"/>

          <body name="right_foot" pos="0 0 -0.409870">
            <site name="right_ankle" class="force-torque"/>
            <joint name="right_ankle_x" pos="0 0 0" axis="1 0 0" range="-30 30" stiffness="400" damping="40" armature=".01"/>
            <joint name="right_ankle_y" pos="0 0 0" axis="0 1 0" range="-55 55" stiffness="400" damping="40" armature=".01"/>
            <joint name="right_ankle_z" pos="0 0 0" axis="0 0 1" range="-40 40" stiffness="400" damping="40" armature=".01"/>
            <geom name="right_foot" type="box" pos="0.045 0 -0.0225" size="0.0885 0.045 0.0275" density="1141"/>
            <site name="right_foot" class="touch" type="box" pos="0.045 0 -0.0225" size="0.0895 0.055 0.0285"/>
          </body>
        </body>
      </body>

      <body name="left_thigh" pos="0 0.084887 0">
        <site name="left_hip" class="force-torque"/>
        <joint name="left_hip_x" axis="1 0 0" range="-15 60" stiffness="500" damping="50" armature=".02"/>
        <joint name="left_hip_y" axis="0 1 0" range="-140 60" stiffness="500" damping="50" armature=".02"/>
        <joint name="left_hip_z" axis="0 0 1" range="-35 60" stiffness="500" damping="50" armature=".02"/>
        <geom name="left_thigh" fromto="0 0 -0.06 0 0 -0.36" size=".055" density="1269"/>
        <site name="left_thigh" class="touch" pos="0 0 -0.21" size="0.056 0.301" zaxis="0 0 -1"/>

        <body name="left_shin" pos="0 0 -0.421546">
          <site name="left_knee" class="force-torque" pos="0 0 .02"/>
          <joint name="left_knee" pos="0 0 0" axis="0 1 0" range="0 160" stiffness="500" damping="50" armature=".02"/>
          <geom name="left_shin" fromto="0 0 -0.045 0 0 -0.355"  size=".05" density="1014"/>
          <site name="left_shin" class="touch" pos="0 0 -0.2" size="0.051 0.156" zaxis="0 0 -1"/>

          <body name="left_foot" pos="0 0 -0.409870">
            <site name="left_ankle" class="force-torque"/>
            <joint name="left_ankle_x" pos="0 0 0" axis="1 0 0" range="-30 30" stiffness="400" damping="40" armature=".01"/>
            <joint name="left_ankle_y" pos="0 0 0" axis="0 1 0" range="-55 55" stiffness="400" damping="40" armature=".01"/>
            <joint name="left_ankle_z" pos="0 0 0" axis="0 0 1" range="-40 40" stiffness="400" damping="40" armature=".01"/>
            <geom name="left_foot" type="box" pos="0.045 0 -0.0225" size="0.0885 0.045 0.0275" density="1141"/>
            <site name="left_foot" class="touch" type="box" pos="0.045 0 -0.0225" size="0.0895 0.055 0.0285"/>
          </body>
        </body>
      </body>
    </body>
  </worldbody>

  <actuator>
    <motor name='abdomen_x'       	gear='200' 	joint='abdomen_x'/>
    <motor name='abdomen_y'       	gear='200' 	joint='abdomen_y'/>
    <motor name='abdomen_z'       	gear='200' 	joint='abdomen_z'/>
    <motor name='neck_x'          	gear='50' 	joint='neck_x'/>
    <motor name='neck_y'            gear='50' 	joint='neck_y'/>
    <motor name='neck_z'           	gear='50' 	joint='neck_z'/>
    <motor name='right_shoulder_x' 	gear='100' 	joint='right_shoulder_x'/>
    <motor name='right_shoulder_y' 	gear='100' 	joint='right_shoulder_y'/>
    <motor name='right_shoulder_z' 	gear='100' 	joint='right_shoulder_z'/>
    <motor name='right_elbow'     	gear='70' 	joint='right_elbow'/>
    <motor name='right_hand_x' 	  	gear='50' 	joint='right_hand_x'/>
    <motor name='right_hand_y'    	gear='50' 	joint='right_hand_y'/>
    <motor name='right_hand_z'    	gear='50' 	joint='right_hand_z'/>
    <motor name='left_shoulder_x' 	gear='100' 	joint='left_shoulder_x'/>
    <motor name='left_shoulder_y' 	gear='100' 	joint='left_shoulder_y'/>
    <motor name='left_shoulder_z' 	gear='100' 	joint='left_shoulder_z'/>
    <motor name='left_elbow'      	gear='70' 	joint='left_elbow'/>
    <motor name='right_hip_x'     	gear='200' 	joint='right_hip_x'/>
    <motor name='right_hip_z'     	gear='200' 	joint='right_hip_z'/>
    <motor name='right_hip_y'     	gear='200' 	joint='right_hip_y'/>
    <motor name='right_knee'      	gear='150' 	joint='right_knee'/>
    <motor name='right_ankle_x'   	gear='90' 	joint='right_ankle_x'/>
    <motor name='right_ankle_y'   	gear='90' 	joint='right_ankle_y'/>
    <motor name='right_ankle_z'   	gear='90' 	joint='right_ankle_z'/>
    <motor name='left_hip_x'      	gear='200' 	joint='left_hip_x'/>
    <motor name='left_hip_z'      	gear='200' 	joint='left_hip_z'/>
    <motor name='left_hip_y'      	gear='200' 	joint='left_hip_y'/>
    <motor name='left_knee'       	gear='150' 	joint='left_knee'/>
    <motor name='left_ankle_x'    	gear='90' 	joint='left_ankle_x'/>
    <motor name='left_ankle_y'    	gear='90' 	joint='left_ankle_y'/>
    <motor name='left_ankle_z'    	gear='90' 	joint='left_ankle_z'/>
  </actuator>

  <sensor>
    <subtreelinvel name="pelvis_subtreelinvel" body="pelvis"/>
    <accelerometer name="root_accel"    site="root"/>
    <velocimeter name="root_vel"        site="root"/>
    <gyro name="root_gyro"              site="root"/>

    <force name="left_ankle_force"       site="left_ankle"/>
    <force name="right_ankle_force"      site="right_ankle"/>
    <force name="left_knee_force"        site="left_knee"/>
    <force name="right_knee_force"       site="right_knee"/>
    <force name="left_hip_force"         site="left_hip"/>
    <force name="right_hip_force"        site="right_hip"/>

    <torque name="left_ankle_torque"     site="left_ankle"/>
    <torque name="right_ankle_torque"    site="right_ankle"/>
    <torque name="left_knee_torque"      site="left_knee"/>
    <torque name="right_knee_torque"     site="right_knee"/>
    <torque name="left_hip_torque"       site="left_hip"/>
    <torque name="right_hip_torque"      site="right_hip"/>

    <touch name="pelvis_touch"           site="pelvis"/>
    <touch name="upper_waist_touch"      site="upper_waist"/>
    <touch name="torso_touch"            site="torso"/>
    <touch name="head_touch"             site="head"/>
    <touch name="right_upper_arm_touch"  site="right_upper_arm"/>
    <touch name="right_lower_arm_touch"  site="right_lower_arm"/>
    <touch name="right_hand_touch"       site="right_hand"/>
    <touch name="left_upper_arm_touch"   site="left_upper_arm"/>
    <touch name="left_lower_arm_touch"   site="left_lower_arm"/>
    <touch name="left_hand_touch"        site="left_hand"/>
    <touch name="right_thigh_touch"      site="right_thigh"/>
    <touch name="right_shin_touch"       site="right_shin"/>
    <touch name="right_foot_touch"       site="right_foot"/>
    <touch name="left_thigh_touch"       site="left_thigh"/>
    <touch name="left_shin_touch"        site="left_shin"/>
    <touch name="left_foot_touch"        site="left_foot"/>
  </sensor>

</mujoco>


================================================
FILE: timechamber/tasks/data/models/llc_reallusion_sword_shield.pth
================================================
[File too large to display: 80.6 MB]

================================================
FILE: timechamber/tasks/data/motions/reallusion_sword_shield/README.txt
================================================
This motion data is provided courtesy of Reallusion,
strictly for noncommercial use. The original motion data
is available at:
https://actorcore.reallusion.com/motion/pack/studio-mocap-sword-and-shield-stunts
https://actorcore.reallusion.com/motion/pack/studio-mocap-sword-and-shield-moves


================================================
FILE: timechamber/tasks/data/motions/reallusion_sword_shield/dataset_reallusion_sword_shield.yaml
================================================
motions:
  - file: "RL_Avatar_Atk_2xCombo01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_2xCombo02_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_2xCombo03_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_2xCombo04_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_2xCombo05_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo02_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo03_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo04_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo05_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo06_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_3xCombo07_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_4xCombo01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_4xCombo02_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_4xCombo03_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_SlashDown_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_SlashLeft_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_SlashRight_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_SlashUp_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_Spin_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_Stab_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Counter_Atk01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Counter_Atk02_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Counter_Atk03_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Counter_Atk04_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Kill_2xCombo01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Kill_2xCombo02_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Kill_3xCombo01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Kill_3xCombo02_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Kill_4xCombo01_Motion.npy"
    weight: 0.00724638
  - file: "RL_Avatar_Atk_Jump_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Atk_Kick_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Atk_ShieldCharge_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Atk_ShieldSwipe01_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Atk_ShieldSwipe02_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Counter_Atk05_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Standoff_Feint_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Dodge_Backward_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_RunBackward_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_WalkBackward01_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_WalkBackward02_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_Dodgle_Left_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_RunLeft_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_WalkLeft01_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_WalkLeft02_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_Dodgle_Right_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_RunRight_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_WalkRight01_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_WalkRight02_Motion.npy"
    weight: 0.01552795
  - file: "RL_Avatar_RunForward_Motion.npy"
    weight: 0.02070393
  - file: "RL_Avatar_WalkForward01_Motion.npy"
    weight: 0.02070393
  - file: "RL_Avatar_WalkForward02_Motion.npy"
    weight: 0.02070393
  - file: "RL_Avatar_Standoff_Circle_Motion.npy"
    weight: 0.06211180
  - file: "RL_Avatar_TurnLeft90_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_TurnLeft180_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_TurnRight90_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_TurnRight180_Motion.npy"
    weight: 0.03105590
  - file: "RL_Avatar_Fall_Backward_Motion.npy"
    weight: 0.00869565
  - file: "RL_Avatar_Fall_Left_Motion.npy"
    weight: 0.00869565
  - file: "RL_Avatar_Fall_Right_Motion.npy"
    weight: 0.00869565
  - file: "RL_Avatar_Fall_SpinLeft_Motion.npy"
    weight: 0.00869565
  - file: "RL_Avatar_Fall_SpinRight_Motion.npy"
    weight: 0.00869565
  - file: "RL_Avatar_Idle_Alert(0)_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Idle_Alert_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Idle_Battle(0)_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Idle_Battle_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Idle_Ready(0)_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Idle_Ready_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Standoff_Swing_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Taunt_PoundChest_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Taunt_Roar_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Taunt_ShieldKnock_Motion.npy"
    weight: 0.00434783
  - file: "RL_Avatar_Shield_BlockBackward_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Shield_BlockCrouch_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Shield_BlockDown_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Shield_BlockLeft_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Shield_BlockRight_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Shield_BlockUp_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryBackward01_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryBackward02_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryBackward03_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryBackward04_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryCrouch_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryDown_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryLeft_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryRight_Motion.npy"
    weight: 0.00289855
  - file: "RL_Avatar_Sword_ParryUp_Motion.npy"
    weight: 0.00289855


================================================
FILE: timechamber/tasks/ma_ant_battle.py
================================================
from typing import Tuple
import os

import torch
from isaacgym import gymtorch
from isaacgym.gymtorch import *

from timechamber.utils.torch_jit_utils import *
from .base.ma_vec_task import MA_VecTask


class MA_Ant_Battle(MA_VecTask):

    def __init__(self, cfg, sim_device, rl_device, graphics_device_id, headless, virtual_screen_capture, force_render):

        self.extras = None
        self.cfg = cfg
        self.randomization_params = self.cfg["task"]["randomization_params"]
        self.randomize = self.cfg["task"]["randomize"]

        self.max_episode_length = self.cfg["env"]["episodeLength"]
        self.termination_height = self.cfg["env"]["terminationHeight"]
        self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
        self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
        self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
        self.action_scale = self.cfg["env"]["control"]["actionScale"]
        self.joints_at_limit_cost_scale = self.cfg["env"]["jointsAtLimitCost"]
        self.dof_vel_scale = self.cfg["env"]["dofVelocityScale"]
        self.ant_agents_state = []
        self.win_reward_scale = 2000
        self.move_to_op_reward_scale = 1.
        self.stay_in_center_reward_scale = 0.2
        self.action_cost_scale = -0.000025
        self.push_scale = 1.
        self.dense_reward_scale = 1.0
        self.hp_decay_scale = 1.
        self.Kp = self.cfg["env"]["control"]["stiffness"]
        self.Kd = self.cfg["env"]["control"]["damping"]
        self.cfg["env"]["numObservations"] = 32 + 27 * (self.cfg["env"].get("numAgents", 1) - 1)
        self.cfg["env"]["numActions"] = 8
        self.borderline_space = cfg["env"]["borderlineSpace"]
        self.borderline_space_unit = self.borderline_space / self.max_episode_length
        self.ant_body_colors = [gymapi.Vec3(*rgb_arr) for rgb_arr in self.cfg["env"]["color"]]
        super().__init__(config=self.cfg, sim_device=sim_device, rl_device=rl_device,
                         graphics_device_id=graphics_device_id,
                         headless=headless)

        self.use_central_value = False
        self.obs_idxs = torch.eye(4, dtype=torch.float32, device=self.device)
        if self.viewer is not None:
            for i, env in enumerate(self.envs):
                self._add_circle_borderline(env, self.borderline_space)
            cam_pos = gymapi.Vec3(15.0, 0.0, 3.4)
            cam_target = gymapi.Vec3(10.0, 0.0, 0.0)
            self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)

        # get gym GPU state tensors
        actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
        sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)

        sensors_per_env = 4
        self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs,
                                                                          sensors_per_env * 6)

        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)

        self.root_states = gymtorch.wrap_tensor(actor_root_state)
        print(f'root_states:{self.root_states.shape}')
        self.initial_root_states = self.root_states.clone()
        self.initial_root_states[:, 7:13] = 0  # set lin_vel and ang_vel to 0

        # create some wrapper tensors for different slices
        self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
        print(f'dof:{self.dof_state.shape}')
        dof_state_shaped = self.dof_state.view(self.num_envs, -1, 2)
        for idx in range(self.num_agents):
            ant_root_state = self.root_states[idx::self.num_agents]
            ant_dof_pos = dof_state_shaped[:, idx * self.num_dof:(idx + 1) * self.num_dof, 0]
            ant_dof_vel = dof_state_shaped[:, idx * self.num_dof:(idx + 1) * self.num_dof, 1]
            self.ant_agents_state.append((ant_root_state, ant_dof_pos, ant_dof_vel))

        self.initial_dof_pos = torch.zeros_like(self.ant_agents_state[0][1], device=self.device, dtype=torch.float)
        zero_tensor = torch.tensor([0.0], device=self.device)
        self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
                                           torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
                                                       self.initial_dof_pos))
        self.initial_dof_vel = torch.zeros_like(self.ant_agents_state[0][2], device=self.device, dtype=torch.float)
        self.dt = self.cfg["sim"]["dt"]

        torques = self.gym.acquire_dof_force_tensor(self.sim)
        self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, self.num_agents * self.num_dof)

        self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat(
            (self.num_agents * self.num_envs, 1))
        self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat(
            (self.num_agents * self.num_envs, 1))
        self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat(
            (self.num_agents * self.num_envs, 1))

    def allocate_buffers(self):
        self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device,
                                   dtype=torch.float)
        self.rew_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.float)
        self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
        self.timeout_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.progress_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.randomize_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.extras = {'ranks': torch.zeros((self.num_envs, self.num_agents), device=self.device, dtype=torch.long),
                       'win': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device,
                                          dtype=torch.bool),
                       'lose': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device,
                                           dtype=torch.bool),
                       'draw': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device,
                                           dtype=torch.bool)}

    def create_sim(self):
        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
        lines = []
        borderline_height = 0.01
        for height in range(20):
            for angle in range(360):
                begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height]
                end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height]
                lines.append(begin_point)
                lines.append(end_point)
        self.lines = np.array(lines, dtype=np.float32)
        self._create_ground_plane()
        print(f'num envs {self.num_envs} env spacing {self.cfg["env"]["envSpacing"]}')
        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))

        # If randomizing, apply once immediately on startup before the fist sim step
        if self.randomize:
            self.apply_randomizations(self.randomization_params)

    def _add_circle_borderline(self, env, radius):
        lines = self.lines * radius
        colors = np.array([[1, 0, 0]] * (len(lines) // 2), dtype=np.float32)
        self.gym.add_lines(self.viewer, env, len(lines) // 2, lines, colors)

    def _create_ground_plane(self):
        plane_params = gymapi.PlaneParams()
        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
        plane_params.static_friction = self.plane_static_friction
        plane_params.dynamic_friction = self.plane_dynamic_friction
        self.gym.add_ground(self.sim, plane_params)

    def _create_envs(self, num_envs, spacing, num_per_row):
        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
        upper = gymapi.Vec3(spacing, spacing, spacing)

        asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets')
        asset_file = "mjcf/nv_ant.xml"

        if "asset" in self.cfg["env"]:
            asset_file = self.cfg["env"]["asset"].get("assetFileName", asset_file)

        asset_path = os.path.join(asset_root, asset_file)
        asset_root = os.path.dirname(asset_path)
        asset_file = os.path.basename(asset_path)

        asset_options = gymapi.AssetOptions()
        # Note - DOF mode is set in the MJCF file and loaded by Isaac Gym
        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
        asset_options.angular_damping = 0.0
        ant_assets = []
        for _ in range(self.num_agents):
            ant_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
            ant_assets.append(ant_asset)
        dof_props = self.gym.get_asset_dof_properties(ant_assets[0])

        self.num_dof = self.gym.get_asset_dof_count(ant_assets[0])
        self.num_bodies = self.gym.get_asset_rigid_body_count(ant_assets[0])
        for i in range(self.num_dof):
            dof_props['driveMode'][i] = gymapi.DOF_MODE_POS
            dof_props['stiffness'][i] = self.Kp
            dof_props['damping'][i] = self.Kd

        start_pose = gymapi.Transform()
        start_pose.p = gymapi.Vec3(-self.borderline_space + 1, -self.borderline_space + 1, 1.)
        self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
                                           device=self.device)

        self.torso_index = 0
        self.num_bodies = self.gym.get_asset_rigid_body_count(ant_assets[0])
        body_names = [self.gym.get_asset_rigid_body_name(ant_assets[0], i) for i in range(self.num_bodies)]
        extremity_names = [s for s in body_names if "foot" in s]
        self.extremities_index = torch.zeros(len(extremity_names), dtype=torch.long, device=self.device)
        print(body_names, extremity_names, self.extremities_index)
        # create force sensors attached to the "feet"
        extremity_indices = [self.gym.find_asset_rigid_body_index(ant_assets[0], name) for name in extremity_names]
        sensor_pose = gymapi.Transform()
        for body_idx in extremity_indices:
            self.gym.create_asset_force_sensor(ant_assets[0], body_idx, sensor_pose)

        self.ant_handles = []
        self.actor_indices = []
        self.envs = []
        self.dof_limits_lower = []
        self.dof_limits_upper = []

        for i in range(self.num_envs):
            # create env instance
            env_ptr = self.gym.create_env(
                self.sim, lower, upper, num_per_row
            )
            # create actor instance
            for j in range(self.num_agents):
                ant_handle = self.gym.create_actor(env_ptr, ant_assets[j], start_pose, "ant_" + str(j), i, -1, 0)
                actor_index = self.gym.get_actor_index(env_ptr, ant_handle, gymapi.DOMAIN_SIM)
                self.gym.set_actor_dof_properties(env_ptr, ant_handle, dof_props)
                self.actor_indices.append(actor_index)
                self.gym.enable_actor_dof_force_sensors(env_ptr, ant_handle)
                self.ant_handles.append(ant_handle)
                for k in range(self.num_bodies):
                    self.gym.set_rigid_body_color(
                        env_ptr, ant_handle, k, gymapi.MESH_VISUAL, self.ant_body_colors[j])
            self.envs.append(env_ptr)

        dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.ant_handles[0])

        for j in range(self.num_dof):
            if dof_prop['lower'][j] > dof_prop['upper'][j]:
                self.dof_limits_lower.append(dof_prop['upper'][j])
                self.dof_limits_upper.append(dof_prop['lower'][j])
            else:
                self.dof_limits_lower.append(dof_prop['lower'][j])
                self.dof_limits_upper.append(dof_prop['upper'][j])

        self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
        self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
        self.actor_indices = to_torch(self.actor_indices, device=self.device).to(dtype=torch.int32)

        for i in range(len(extremity_names)):
            self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
                                                                              extremity_names[i])

    def compute_reward(self, actions):

        self.rew_buf[:], self.reset_buf[:], self.extras['ranks'][:], self.extras['win'], self.extras['lose'], \
        self.extras[
            'draw'] = compute_ant_reward(
            self.obs_buf,
            self.reset_buf,
            self.progress_buf,
            self.torques,
            self.extras['ranks'],
            self.termination_height,
            self.max_episode_length,
            self.borderline_space,
            self.borderline_space_unit,
            self.win_reward_scale,
            self.stay_in_center_reward_scale,
            self.action_cost_scale,
            self.push_scale,
            self.joints_at_limit_cost_scale,
            self.dense_reward_scale,
            self.dt,
            self.num_agents
        )

    def compute_observations(self):
        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)
        self.gym.refresh_force_sensor_tensor(self.sim)
        self.gym.refresh_dof_force_tensor(self.sim)
        for agent_idx in range(self.num_agents):
            self.obs_buf[agent_idx * self.num_envs:(agent_idx + 1) * self.num_envs, :] = compute_ant_observations(
                self.ant_agents_state,
                self.progress_buf,
                self.dof_limits_lower,
                self.dof_limits_upper,
                self.dof_vel_scale,
                self.termination_height,
                self.borderline_space_unit,
                self.borderline_space,
                self.num_agents,
                agent_idx,
            )

    def reset_idx(self, env_ids):
        # print('reset.....', env_ids)
        # Randomization can happen only at reset time, since it can reset actor positions on GPU
        if self.randomize:
            self.apply_randomizations(self.randomization_params)

        positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device)
        velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)

        for agent_idx in range(self.num_agents):
            root_state, dof_pos, dof_vel = self.ant_agents_state[agent_idx]
            dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
                                            self.dof_limits_upper)
            dof_vel[env_ids] = velocities
        agent_env_ids = expand_env_ids(env_ids, self.num_agents)
        env_ids_int32 = self.actor_indices[agent_env_ids]
        rand_angle = torch.rand((len(env_ids),), device=self.device) * torch.pi * 2  # generate angle in 0-360

        rand_pos = (self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) -
                    torch.rand((len(agent_env_ids), 2), device=self.device))

        unit_angle = 2 * torch.pi / self.num_agents
        for agent_idx in range(self.num_agents):
            rand_pos[agent_idx::self.num_agents, 0] *= torch.cos(rand_angle + agent_idx * unit_angle)
            rand_pos[agent_idx::self.num_agents, 1] *= torch.sin(rand_angle + agent_idx * unit_angle)
        rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 1), device=self.device)
        rand_rotation = quat_from_angle_axis(rand_floats[:, 0] * np.pi, self.z_unit_tensor[agent_env_ids])
        self.root_states[agent_env_ids] = self.initial_root_states[agent_env_ids]
        self.root_states[agent_env_ids, :2] = rand_pos
        self.root_states[agent_env_ids, 3:7] = rand_rotation
        self.gym.set_actor_root_state_tensor_indexed(self.sim,
                                                     gymtorch.unwrap_tensor(self.root_states),
                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))

        self.gym.set_dof_state_tensor_indexed(self.sim,
                                              gymtorch.unwrap_tensor(self.dof_state),
                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
        self.progress_buf[env_ids] = 0
        self.reset_buf[env_ids] = 0
        self.extras['ranks'][env_ids] = 0

    def pre_physics_step(self, actions):
        # actions.shape = [num_envs * num_agents, num_actions], stacked as followed:
        # {[(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env0),
        #  [(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env1),
        #  ... }

        self.actions = torch.tensor([], device=self.device)
        for agent_idx in range(self.num_agents):
            self.actions = torch.cat((self.actions, actions[agent_idx * self.num_envs:(agent_idx + 1) * self.num_envs]),
                                     dim=-1)
        tmp_actions = self.extras['ranks'].unsqueeze(-1).repeat_interleave(self.num_actions, dim=-1).view(self.num_envs,
                                                                                                          self.num_actions * self.num_agents)
        zero_actions = torch.zeros_like(tmp_actions, dtype=torch.float)
        self.actions = torch.where(tmp_actions > 0, zero_actions, self.actions)

        # reshape [num_envs * num_agents, num_actions] to [num_envs, num_agents * num_actions] print(f'action_size{

        targets = self.actions

        self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets))

    def post_physics_step(self):
        self.progress_buf += 1
        self.randomize_buf += 1

        resets = self.reset_buf.reshape(self.num_envs, 1).sum(dim=1)
        # print(resets)
        env_ids = (resets == 1).nonzero(as_tuple=False).flatten()
        if len(env_ids) > 0:
            self.reset_idx(env_ids)

        self.compute_observations()
        self.compute_reward(self.actions)

        if self.viewer is not None:
            self.gym.clear_lines(self.viewer)
            for i, env in enumerate(self.envs):
                self._add_circle_borderline(env, self.borderline_space - self.borderline_space_unit * self.progress_buf[
                    i].item())

    def get_number_of_agents(self):
        # only train 1 agent
        return 1

    def zero_actions(self) -> torch.Tensor:
        """Returns a buffer with zero actions.

        Returns:
            A buffer of zero torch actions
        """
        actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32,
                              device=self.rl_device)
        self.extras['win'] = self.extras['lose'] = self.extras['draw'] = 0
        return actions

    def clear_count(self):
        self.dense_reward_scale *= 0.9
        self.extras['ranks'] = torch.zeros((self.num_agents, self.num_agents), device=self.device, dtype=torch.float)


#####################################################################
###=========================jit functions=========================###
#####################################################################


@torch.jit.script
def expand_env_ids(env_ids, n_agents):
    # type: (Tensor, int) -> Tensor
    device = env_ids.device
    # print(f'nanget:{n_agents}')
    agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
    for idx in range(n_agents):
        agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
    return agent_env_ids


@torch.jit.script
def compute_ant_reward(
        obs_buf,
        reset_buf,
        progress_buf,
        torques,
        now_rank,
        termination_height,
        max_episode_length,
        borderline_space,
        borderline_space_unit,
        win_reward_scale,
        stay_in_center_reward_scale,
        action_cost_scale,
        push_scale,
        joints_at_limit_cost_scale,
        dense_reward_scale,
        dt,
        num_agents
):
    # type: (Tensor, Tensor, Tensor,Tensor,Tensor,float,float,float,float,float,float,float,float,float,float,float,int) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor,Tensor]
    obs = obs_buf.view(num_agents, -1, obs_buf.shape[1])
    nxt_rank_val = num_agents - torch.count_nonzero(now_rank, dim=-1).view(-1, 1).repeat_interleave(num_agents, dim=-1)
    is_out = torch.sum(torch.square(obs[:, :, 0:2]), dim=-1) >= \
             (borderline_space - progress_buf * borderline_space_unit).square()
    nxt_rank = torch.where((torch.transpose(is_out, 0, 1) > 0) & (now_rank == 0), nxt_rank_val, now_rank)
    # reset agents
    tmp_ones = torch.ones_like(reset_buf)
    reset = torch.where(is_out[0, :], tmp_ones, reset_buf)
    reset = torch.where(progress_buf >= max_episode_length - 1, tmp_ones, reset)
    reset = torch.where(torch.min(is_out[1:], dim=0).values, tmp_ones, reset)
    tmp_reset = reset.view(-1, 1).repeat_interleave(num_agents, dim=-1)
    nxt_rank = torch.where((tmp_reset == 1) & (nxt_rank == 0),
                           nxt_rank_val - 1,
                           nxt_rank)
    # compute metric logic
    tmp_reset = reset.view(1, -1).repeat_interleave(num_agents - 1, dim=0)
    tmp_zeros = torch.zeros_like(is_out[1:], dtype=torch.bool)
    wins = torch.ones_like(is_out[1:], dtype=torch.bool)
    loses = torch.ones_like(is_out[1:], dtype=torch.bool)
    draws = (progress_buf >= max_episode_length - 1).view(1, -1).repeat_interleave(num_agents - 1, dim=0)
    wins = torch.where(is_out[1:], wins & (tmp_reset == 1), tmp_zeros)
    draws = torch.where(is_out[1:] == 0, draws & (tmp_reset == 1), tmp_zeros)
    loses = torch.where(is_out[1:] == 0, loses & (tmp_reset == 1) & (draws == 0), tmp_zeros)

    sparse_reward = 1.0 * reset
    reward_per_rank = 2 * win_reward_scale / (num_agents - 1)
    sparse_reward = sparse_reward * (win_reward_scale - (nxt_rank[:, 0] - 1) * reward_per_rank)
    stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs[0, :, :2], dim=-1))
    dof_at_limit_cost = torch.sum(obs[0, :, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
    action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
    # print("torques:", torques[0, 2])
    not_move_penalty = torch.exp(-torch.sum(torch.abs(torques), dim=1))
    # print(f'action:...{action_cost_penalty.shape}')
    dense_reward = dof_at_limit_cost + action_cost_penalty + not_move_penalty + stay_in_center_reward
    total_reward = sparse_reward + dense_reward * dense_reward_scale

    return total_reward, reset, nxt_rank, wins.flatten(), loses.flatten(), draws.flatten()


@torch.jit.script
def compute_ant_observations(
        ant_agents_state,
        progress_buf,
        dof_limits_lower,
        dof_limits_upper,
        dof_vel_scale,
        termination_height,
        borderline_space_unit,
        borderline_space,
        num_agents,
        agent_idx,
):
    # type: (List[Tuple[Tensor,Tensor,Tensor]],Tensor,Tensor,Tensor,float,float,float,float,int,int)->Tensor
    # tot length:13+8+8+1+1+(num_agents-1)*(7+2+8+8+1)
    self_root_state, self_dof_pos, self_dof_vel = ant_agents_state[agent_idx]
    dof_pos_scaled = unscale(self_dof_pos, dof_limits_lower, dof_limits_upper)
    now_border_space = (borderline_space - progress_buf * borderline_space_unit).unsqueeze(-1)
    obs = torch.cat((self_root_state[:, :13], dof_pos_scaled, self_dof_vel * dof_vel_scale,
                     now_border_space - torch.sqrt(torch.sum(self_root_state[:, :2].square(), dim=-1)).unsqueeze(-1),
                     # dis to border
                     now_border_space,
                     torch.unsqueeze(self_root_state[:, 2] < termination_height, -1)), dim=-1)
    for op_idx in range(num_agents):
        if op_idx == agent_idx:
            continue
        op_root_state, op_dof_pos, op_dof_vel = ant_agents_state[op_idx]
        dof_pos_scaled = unscale(op_dof_pos, dof_limits_lower, dof_limits_upper)
        obs = torch.cat((obs, op_root_state[:, :7], self_root_state[:, :2] - op_root_state[:, :2],
                         dof_pos_scaled, op_dof_vel * dof_vel_scale,
                         now_border_space - torch.sqrt(torch.sum(op_root_state[:, :2].square(), dim=-1)).unsqueeze(-1),
                         torch.unsqueeze(op_root_state[:, 2] < termination_height, -1)), dim=-1)
    # print(obs.shape)
    return obs


@torch.jit.script
def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
    return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
                    quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))


================================================
FILE: timechamber/tasks/ma_ant_sumo.py
================================================
from typing import Tuple
import numpy as np
import os
import math
import torch
import random

from isaacgym import gymtorch
from isaacgym import gymapi
from isaacgym.gymtorch import *
# from torch.tensor import Tensor

from timechamber.utils.torch_jit_utils import *
from .base.vec_task import VecTask
from .base.ma_vec_task import MA_VecTask


# todo critic_state full obs
class MA_Ant_Sumo(MA_VecTask):

    def __init__(self, cfg, sim_device, rl_device, graphics_device_id, headless, virtual_screen_capture, force_render):

        self.cfg = cfg
        self.randomization_params = self.cfg["task"]["randomization_params"]
        self.randomize = self.cfg["task"]["randomize"]

        self.max_episode_length = self.cfg["env"]["episodeLength"]

        self.termination_height = self.cfg["env"]["terminationHeight"]
        self.borderline_space = cfg["env"]["borderlineSpace"]
        self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
        self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
        self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
        self.action_scale = self.cfg["env"]["control"]["actionScale"]
        self.joints_at_limit_cost_scale = self.cfg["env"]["jointsAtLimitCost"]
        self.dof_vel_scale = self.cfg["env"]["dofVelocityScale"]

        self.draw_penalty_scale = -1000
        self.win_reward_scale = 2000
        self.move_to_op_reward_scale = 1.
        self.stay_in_center_reward_scale = 0.2
        self.action_cost_scale = -0.000025
        self.push_scale = 1.
        self.dense_reward_scale = 1.
        self.hp_decay_scale = 1.

        self.Kp = self.cfg["env"]["control"]["stiffness"]
        self.Kd = self.cfg["env"]["control"]["damping"]

        # see func: compute_ant_observations() for details
        # self.cfg["env"]["numObservations"] = 48 # dof pos(2) + dof vel(2) + dof action(2) + feet force sensor(force&torque, 6)
        self.cfg["env"][
            "numObservations"] = 40
        self.cfg["env"]["numActions"] = 8
        self.cfg["env"]["numAgents"] = 2
        self.use_central_value = False

        super().__init__(config=self.cfg, sim_device=sim_device, rl_device=rl_device,
                         graphics_device_id=graphics_device_id,
                         headless=headless, virtual_screen_capture=virtual_screen_capture,
                         force_render=force_render)

        if self.viewer is not None:
            for env in self.envs:
                self._add_circle_borderline(env)
            cam_pos = gymapi.Vec3(15.0, 0.0, 3.0)
            cam_target = gymapi.Vec3(10.0, 0.0, 0.0)
            self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)

        # get gym GPU state tensors
        actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
        sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)

        sensors_per_env = 4
        self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs * self.num_agents,
                                                                          sensors_per_env * 6)

        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)

        self.root_states = gymtorch.wrap_tensor(actor_root_state)
        print(f'root_states:{self.root_states.shape}')
        self.initial_root_states = self.root_states.clone()
        self.initial_root_states[:, 7:13] = 0  # set lin_vel and ang_vel to 0

        # create some wrapper tensors for different slices
        self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
        print(f"dof state shape: {self.dof_state.shape}")
        self.dof_pos = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_dof, 0]
        self.dof_pos_op = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_dof:2 * self.num_dof, 0]
        self.dof_vel = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_dof, 1]
        self.dof_vel_op = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_dof:2 * self.num_dof, 1]

        self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float)
        zero_tensor = torch.tensor([0.0], device=self.device)
        self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
                                           torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
                                                       self.initial_dof_pos))
        self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float)
        self.dt = self.cfg["sim"]["dt"]

        torques = self.gym.acquire_dof_force_tensor(self.sim)
        self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, 2 * self.num_dof)

        self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
        self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
        self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))

        self.hp = torch.ones((self.num_envs,), device=self.device, dtype=torch.float32) * 100
        self.hp_op = torch.ones((self.num_envs,), device=self.device, dtype=torch.float32) * 100

    def allocate_buffers(self):
        self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device,
                                   dtype=torch.float)
        self.rew_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.float)
        self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
        self.timeout_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.progress_buf = torch.zeros(
            self.num_envs, device=self.device, dtype=torch.long)
        self.randomize_buf = torch.zeros(
            self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
        self.extras = {
            'win': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
            'lose': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
            'draw': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool)}

    def create_sim(self):
        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)

        self._create_ground_plane()
        print(f'num envs {self.num_envs} env spacing {self.cfg["env"]["envSpacing"]}')
        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))

        # If randomizing, apply once immediately on startup before the fist sim step
        if self.randomize:
            self.apply_randomizations(self.randomization_params)

    def _add_circle_borderline(self, env):
        lines = []
        borderline_height = 0.01
        for height in range(20):
            for angle in range(360):
                begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height]
                end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height]
                lines.append(begin_point)
                lines.append(end_point)
        lines = np.array(lines, dtype=np.float32) * self.borderline_space
        colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32)
        self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors)

    def _create_ground_plane(self):
        plane_params = gymapi.PlaneParams()
        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
        plane_params.static_friction = self.plane_static_friction
        plane_params.dynamic_friction = self.plane_dynamic_friction
        self.gym.add_ground(self.sim, plane_params)

    def _create_envs(self, num_envs, spacing, num_per_row):
        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
        upper = gymapi.Vec3(spacing, spacing, spacing)

        asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets')
        asset_file = "mjcf/nv_ant.xml"

        if "asset" in self.cfg["env"]:
            asset_file = self.cfg["env"]["asset"].get("assetFileName", asset_file)

        asset_path = os.path.join(asset_root, asset_file)
        asset_root = os.path.dirname(asset_path)
        asset_file = os.path.basename(asset_path)

        asset_options = gymapi.AssetOptions()
        # Note - DOF mode is set in the MJCF file and loaded by Isaac Gym
        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
        asset_options.angular_damping = 0.0

        ant_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
        ant_asset_op = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
        dof_props = self.gym.get_asset_dof_properties(ant_asset)

        self.num_dof = self.gym.get_asset_dof_count(ant_asset)
        self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset)  # 9 = 4 x 2(front&back-end legs) + 1(torso)
        for i in range(self.num_dof):
            dof_props['driveMode'][i] = gymapi.DOF_MODE_POS
            dof_props['stiffness'][i] = self.Kp
            dof_props['damping'][i] = self.Kd

        box_pose = gymapi.Transform()
        box_pose.p = gymapi.Vec3(0, 0, 0)
        start_pose = gymapi.Transform()
        start_pose.p = gymapi.Vec3(-self.borderline_space + 1, -self.borderline_space + 1, 1.)
        start_pose_op = gymapi.Transform()
        start_pose_op.p = gymapi.Vec3(self.borderline_space - 1, self.borderline_space - 1, 1.)

        print(start_pose.p, start_pose_op.p)
        self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
                                           device=self.device)

        self.torso_index = 0
        self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset)
        body_names = [self.gym.get_asset_rigid_body_name(ant_asset, i) for i in range(self.num_bodies)]
        extremity_names = [s for s in body_names if "foot" in s]
        self.extremities_index = torch.zeros(len(extremity_names), dtype=torch.long, device=self.device)

        # create force sensors attached to the "feet"
        extremity_indices = [self.gym.find_asset_rigid_body_index(ant_asset, name) for name in extremity_names]
        sensor_pose = gymapi.Transform()
        sensor_pose_op = gymapi.Transform()
        for body_idx in extremity_indices:
            self.gym.create_asset_force_sensor(ant_asset, body_idx, sensor_pose)
            self.gym.create_asset_force_sensor(ant_asset_op, body_idx, sensor_pose_op)

        self.ant_handles = []
        self.actor_indices = []
        self.actor_indices_op = []
        self.actor_handles_op = []
        self.envs = []
        self.pos_before = torch.zeros(2, device=self.device)
        self.dof_limits_lower = []
        self.dof_limits_upper = []

        for i in range(self.num_envs):
            # create env instance
            env_ptr = self.gym.create_env(
                self.sim, lower, upper, num_per_row
            )
            ant_handle = self.gym.create_actor(env_ptr, ant_asset, start_pose, "ant", i, -1, 0)
            actor_index = self.gym.get_actor_index(env_ptr, ant_handle, gymapi.DOMAIN_SIM)
            self.gym.set_actor_dof_properties(env_ptr, ant_handle, dof_props)
            self.actor_indices.append(actor_index)
            self.gym.enable_actor_dof_force_sensors(env_ptr, ant_handle)

            ant_handle_op = self.gym.create_actor(env_ptr, ant_asset_op, start_pose_op, "ant_op", i, -1, 0)
            actor_index_op = self.gym.get_actor_index(env_ptr, ant_handle_op, gymapi.DOMAIN_SIM)
            self.gym.set_actor_dof_properties(env_ptr, ant_handle_op, dof_props)

            self.actor_indices_op.append(actor_index_op)
            for j in range(self.num_bodies):
                self.gym.set_rigid_body_color(
                    env_ptr, ant_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.97, 0.38, 0.06))
                self.gym.set_rigid_body_color(
                    env_ptr, ant_handle_op, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.24, 0.38, 0.06))

            self.envs.append(env_ptr)
            self.ant_handles.append(ant_handle)
            self.actor_handles_op.append(ant_handle_op)

        dof_prop = self.gym.get_actor_dof_properties(env_ptr, ant_handle)

        for j in range(self.num_dof):
            if dof_prop['lower'][j] > dof_prop['upper'][j]:
                self.dof_limits_lower.append(dof_prop['upper'][j])
                self.dof_limits_upper.append(dof_prop['lower'][j])
            else:
                self.dof_limits_lower.append(dof_prop['lower'][j])
                self.dof_limits_upper.append(dof_prop['upper'][j])

        self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
        self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
        self.actor_indices = to_torch(self.actor_indices, dtype=torch.long, device=self.device)
        self.actor_indices_op = to_torch(self.actor_indices_op, dtype=torch.long, device=self.device)

        for i in range(len(extremity_names)):
            self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
                                                                              extremity_names[i])

    def compute_reward(self, actions):

        self.rew_buf[:], self.reset_buf[:], self.hp[:], self.hp_op[:], \
        self.extras['win'], self.extras['lose'], self.extras['draw'] = compute_ant_reward(
            self.obs_buf[:self.num_envs],
            self.obs_buf[self.num_envs:],
            self.reset_buf,
            self.progress_buf,
            self.pos_before,
            self.torques[:, :self.num_dof],
            self.hp,
            self.hp_op,
            self.termination_height,
            self.max_episode_length,
            self.borderline_space,
            self.draw_penalty_scale,
            self.win_reward_scale,
            self.move_to_op_reward_scale,
            self.stay_in_center_reward_scale,
            self.action_cost_scale,
            self.push_scale,
            self.joints_at_limit_cost_scale,
            self.dense_reward_scale,
            self.hp_decay_scale,
            self.dt,
        )

    def compute_observations(self):
        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)
        self.gym.refresh_force_sensor_tensor(self.sim)
        self.gym.refresh_dof_force_tensor(self.sim)
        self.obs_buf[:self.num_envs] = \
            compute_ant_observations(
                self.root_states[0::2],
                self.root_states[1::2],
                self.dof_pos,
                self.dof_vel,
                self.dof_limits_lower,
                self.dof_limits_upper,
                self.dof_vel_scale,
                self.termination_height
            )

        self.obs_buf[self.num_envs:] = compute_ant_observations(
            self.root_states[1::2],
            self.root_states[0::2],
            self.dof_pos_op,
            self.dof_vel_op,
            self.dof_limits_lower,
            self.dof_limits_upper,
            self.dof_vel_scale,
            self.termination_height
        )

    def reset_idx(self, env_ids):
        # print('reset.....', env_ids)
        # Randomization can happen only at reset time, since it can reset actor positions on GPU
        if self.randomize:
            self.apply_randomizations(self.randomization_params)

        positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device)
        velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)

        self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
                                             self.dof_limits_upper)
        self.dof_vel[env_ids] = velocities

        self.dof_pos_op[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
                                                self.dof_limits_upper)
        self.dof_vel_op[env_ids] = velocities

        env_ids_int32 = (torch.cat((self.actor_indices[env_ids], self.actor_indices_op[env_ids]))).to(dtype=torch.int32)
        agent_env_ids = expand_env_ids(env_ids, 2)

        rand_angle = torch.rand((len(env_ids),), device=self.device) * torch.pi * 2

        rand_pos = torch.ones((len(agent_env_ids), 2), device=self.device) * (
                self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand(
            (len(agent_env_ids), 2), device=self.device) * 2)
        rand_pos[0::2, 0] *= torch.cos(rand_angle)
        rand_pos[0::2, 1] *= torch.sin(rand_angle)
        rand_pos[1::2, 0] *= torch.cos(rand_angle + torch.pi)
        rand_pos[1::2, 1] *= torch.sin(rand_angle + torch.pi)
        rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 3), device=self.device)
        rand_rotation = quat_from_angle_axis(rand_floats[:, 1] * np.pi, self.z_unit_tensor[agent_env_ids])
        rand_rotation2 = quat_from_angle_axis(rand_floats[:, 2] * np.pi, self.z_unit_tensor[agent_env_ids])
        self.root_states[agent_env_ids] = self.initial_root_states[agent_env_ids]
        self.root_states[agent_env_ids, :2] = rand_pos
        self.root_states[agent_env_ids[1::2], 3:7] = rand_rotation[1::2]
        self.root_states[agent_env_ids[0::2], 3:7] = rand_rotation2[0::2]
        self.gym.set_actor_root_state_tensor_indexed(self.sim,
                                                     gymtorch.unwrap_tensor(self.root_states),
                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))

        self.gym.set_dof_state_tensor_indexed(self.sim,
                                              gymtorch.unwrap_tensor(self.dof_state),
                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
        self.pos_before = self.root_states[0::2, :2].clone()

        self.progress_buf[env_ids] = 0
        self.reset_buf[env_ids] = 0

    def pre_physics_step(self, actions):
        # actions.shape = [num_envs * num_agents, num_actions], stacked as followed:
        # {[(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env0),
        #  [(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env1),
        #  ... }

        self.actions = actions.clone().to(self.device)
        self.actions = torch.cat((self.actions[:self.num_envs], self.actions[self.num_envs:]), dim=-1)

        # reshape [num_envs * num_agents, num_actions] to [num_envs, num_agents * num_actions]
        targets = self.actions

        self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets))

    def post_physics_step(self):
        self.progress_buf += 1
        self.randomize_buf += 1

        self.compute_observations()
        self.compute_reward(self.actions)
        self.pos_before = self.obs_buf[:self.num_envs, :2].clone()

    def get_number_of_agents(self):
        # train one agent with index 0
        return 1

    def zero_actions(self) -> torch.Tensor:
        """Returns a buffer with zero actions.

        Returns:
            A buffer of zero torch actions
        """
        actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32,
                              device=self.rl_device)

        return actions

    def clear_count(self):
        self.dense_reward_scale *= 0.9
        self.extras['win'][:] = 0
        self.extras['draw'][:] = 0


#####################################################################
###=========================jit functions=========================###
#####################################################################


@torch.jit.script
def expand_env_ids(env_ids, n_agents):
    # type: (Tensor, int) -> Tensor
    device = env_ids.device
    agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
    for idx in range(n_agents):
        agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
    return agent_env_ids


@torch.jit.script
def compute_move_reward(
        pos,
        pos_before,
        target,
        dt,
        move_to_op_reward_scale
):
    # type: (Tensor,Tensor,Tensor,float,float) -> Tensor
    move_vec = (pos - pos_before) / dt
    direction = target - pos_before
    direction = torch.div(direction, torch.linalg.norm(direction, dim=-1).view(-1, 1))
    s = torch.sum(move_vec * direction, dim=-1)
    return torch.maximum(s, torch.zeros_like(s)) * move_to_op_reward_scale


@torch.jit.script
def compute_ant_reward(
        obs_buf,
        obs_buf_op,
        reset_buf,
        progress_buf,
        pos_before,
        torques,
        hp,
        hp_op,
        termination_height,
        max_episode_length,
        borderline_space,
        draw_penalty_scale,
        win_reward_scale,
        move_to_op_reward_scale,
        stay_in_center_reward_scale,
        action_cost_scale,
        push_scale,
        joints_at_limit_cost_scale,
        dense_reward_scale,
        hp_decay_scale,
        dt,
):
    # type: (Tensor, Tensor, Tensor, Tensor,Tensor,Tensor,Tensor,Tensor,float, float,float, float,float,float,float,float,float,float,float,float,float) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor,Tensor,Tensor]

    hp -= (obs_buf[:, 2] < termination_height) * hp_decay_scale
    hp_op -= (obs_buf_op[:, 2] < termination_height) * hp_decay_scale
    is_out = torch.sum(torch.square(obs_buf[:, 0:2]), dim=-1) >= borderline_space ** 2
    is_out_op = torch.sum(torch.square(obs_buf_op[:, 0:2]), dim=-1) >= borderline_space ** 2
    is_out = is_out | (hp <= 0)
    is_out_op = is_out_op | (hp_op <= 0)
    # reset agents
    tmp_ones = torch.ones_like(reset_buf)
    reset = torch.where(is_out, tmp_ones, reset_buf)
    reset = torch.where(is_out_op, tmp_ones, reset)
    reset = torch.where(progress_buf >= max_episode_length - 1, tmp_ones, reset)

    hp = torch.where(reset > 0, tmp_ones * 100., hp)
    hp_op = torch.where(reset > 0, tmp_ones * 100., hp_op)

    win_reward = win_reward_scale * is_out_op
    lose_penalty = -win_reward_scale * is_out
    draw_penalty = torch.where(progress_buf >= max_episode_length - 1, tmp_ones * draw_penalty_scale,
                               torch.zeros_like(reset, dtype=torch.float))
    move_reward = compute_move_reward(obs_buf[:, 0:2], pos_before,
                                      obs_buf_op[:, 0:2], dt,
                                      move_to_op_reward_scale)
    # stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs_buf[:, :2], dim=-1))
    dof_at_limit_cost = torch.sum(obs_buf[:, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
    push_reward = -push_scale * torch.exp(-torch.linalg.norm(obs_buf_op[:, :2], dim=-1))
    action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
    not_move_penalty = -10 * torch.exp(-torch.sum(torch.abs(torques), dim=1))
    dense_reward = move_reward + dof_at_limit_cost + push_reward + action_cost_penalty + not_move_penalty
    total_reward = win_reward + lose_penalty + draw_penalty + dense_reward * dense_reward_scale

    return total_reward, reset, hp, hp_op, is_out_op, is_out, progress_buf >= max_episode_length - 1


@torch.jit.script
def compute_ant_observations(
        root_states,
        root_states_op,
        dof_pos,
        dof_vel,
        dof_limits_lower,
        dof_limits_upper,
        dof_vel_scale,
        termination_height
):
    # type: (Tensor,Tensor,Tensor,Tensor,Tensor,Tensor,float,float)->Tensor
    dof_pos_scaled = unscale(dof_pos, dof_limits_lower, dof_limits_upper)
    obs = torch.cat(
        (root_states[:, :13], dof_pos_scaled, dof_vel * dof_vel_scale, root_states_op[:, :7],
         root_states[:, :2] - root_states_op[:, :2], torch.unsqueeze(root_states[:, 2] < termination_height, -1),
         torch.unsqueeze(root_states_op[:, 2] < termination_height, -1)), dim=-1)

    return obs


@torch.jit.script
def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
    return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
                    quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))


================================================
FILE: timechamber/tasks/ma_humanoid_strike.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from asyncio import shield
from dis import dis
import torch
import math

from isaacgym import gymapi, gymtorch
from isaacgym.torch_utils import *

import timechamber.tasks.ase_humanoid_base.humanoid_amp_task as humanoid_amp_task
from timechamber.utils import torch_utils


class HumanoidStrike(humanoid_amp_task.HumanoidAMPTask):
    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
        super().__init__(cfg=cfg,
                         sim_params=sim_params,
                         physics_engine=physics_engine,
                         device_type=device_type,
                         device_id=device_id,
                         headless=headless)

        self.ego_to_op_damage = torch.zeros_like(self.reset_buf, device=self.device, dtype=torch.float)
        self.op_to_ego_damage = torch.zeros_like(self.reset_buf, device=self.device, dtype=torch.float)
        
        self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
        self._prev_root_pos_op = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
        self._prev_body_ang_vel = torch.zeros([self.num_envs, self.num_bodies, 3],
                                          device=self.device, dtype=torch.float32)
        self._prev_body_vel = torch.zeros([self.num_envs, self.num_bodies, 3],
                                          device=self.device, dtype=torch.float32)

        strike_body_names = cfg["env"]["strikeBodyNames"]
        self._strike_body_ids = self._build_body_ids_tensor(self.envs[0], self.humanoid_handles[0], strike_body_names)
        force_body_names = cfg["env"]["forceBodies"]
        self._force_body_ids = self._build_body_ids_tensor(self.envs[0], self.humanoid_handles[0], force_body_names)
        
        
        if self.viewer != None:
            for env in self.envs:
                self._add_rectangle_borderline(env)

            cam_pos = gymapi.Vec3(15.0, 0.0, 3.0)
            cam_target = gymapi.Vec3(10.0, 0.0, 0.0)
            self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
        
        ###### Reward Definition ######

        ###### Reward Definition ######

        return
    
    def get_task_obs_size(self):
        obs_size = 0
        if (self._enable_task_obs):
            obs_size = 50
        return obs_size

    def _create_envs(self, num_envs, spacing, num_per_row):

        super()._create_envs(num_envs, spacing, num_per_row)
        return

    def _build_env(self, env_id, env_ptr, humanoid_asset, humanoid_asset_op):
        super()._build_env(env_id, env_ptr, humanoid_asset, humanoid_asset_op)
        return

    def _build_body_ids_tensor(self, env_ptr, actor_handle, body_names):
        env_ptr = self.envs[0]
        actor_handle = self.humanoid_handles[0]
        body_ids = []

        for body_name in body_names:
            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
            assert(body_id != -1)
            body_ids.append(body_id)

        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
        return body_ids

    def _reset_actors(self, env_ids):
        positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device)
        velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)
        self._dof_pos[env_ids] = tensor_clamp(self._initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
                                             self.dof_limits_upper)
        self._dof_vel[env_ids] = velocities

        self._dof_pos_op[env_ids] = tensor_clamp(self._initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
                                                self.dof_limits_upper)
        self._dof_vel_op[env_ids] = velocities

        agent_env_ids = expand_env_ids(env_ids, 2)

        rand_angle = torch.rand((len(env_ids),), device=self.device) * math.pi * 2
        rand_pos = torch.ones((len(agent_env_ids), 2), device=self.device) * (
                self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand(
            (len(agent_env_ids), 2), device=self.device) * 2)
        rand_pos[0::2, 0] *= torch.cos(rand_angle)
        rand_pos[0::2, 1] *= torch.sin(rand_angle)
        rand_pos[1::2, 0] *= torch.cos(rand_angle + math.pi)
        rand_pos[1::2, 1] *= torch.sin(rand_angle + math.pi)

        rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 3), device=self.device)
        rand_rotation = quat_from_angle_axis(rand_floats[:, 1] * np.pi, self.z_unit_tensor[agent_env_ids])
        rand_rotation2 = quat_from_angle_axis(rand_floats[:, 2] * np.pi, self.z_unit_tensor[agent_env_ids])

        self._humanoid_root_states[agent_env_ids] = self._initial_humanoid_root_states[agent_env_ids]
        self._humanoid_root_states[agent_env_ids, :2] = rand_pos
        self._humanoid_root_states[agent_env_ids[1::2], 3:7] = rand_rotation[1::2]
        self._humanoid_root_states[agent_env_ids[0::2], 3:7] = rand_rotation2[0::2]
        
        return

    def _reset_env_tensors(self, env_ids):
        super()._reset_env_tensors(env_ids)
        self.ego_to_op_damage[env_ids] = 0
        self.op_to_ego_damage[env_ids] = 0
        return

    def pre_physics_step(self, actions):
        super().pre_physics_step(actions)
        # self._prev_root_pos[:] = self._humanoid_root_states[self.humanoid_indices, 0:3]
        # self._prev_root_pos_op[:] = self._humanoid_root_states[self.humanoid_indices_op, 0:3]
        # self._prev_body_ang_vel[:] = self._rigid_body_ang_vel[]
        return

    def post_physics_step(self):
        super().post_physics_step()
        self._prev_body_ang_vel[:] = self._rigid_body_ang_vel[:]
        self._prev_body_vel[:] = self._rigid_body_vel[:]

    def _compute_observations(self):

        obs, obs_op = self._compute_humanoid_obs()
        if (self._enable_task_obs):
            task_obs, task_obs_op = self._compute_task_obs()
            obs = torch.cat([obs, task_obs], dim=-1)
            obs_op = torch.cat([obs_op, task_obs_op], dim=-1)
        self.obs_buf[:self.num_envs] = obs
        self.obs_buf[self.num_envs:] = obs_op
        return

    def _compute_task_obs(self):
        body_pos = self._rigid_body_pos
        body_rot = self._rigid_body_rot
        body_vel = self._rigid_body_vel

        body_pos_op = self._rigid_body_pos_op
        body_rot_op = self._rigid_body_rot_op
        body_vel_op = self._rigid_body_vel_op

        # num_envs, 13
        root_states = self._humanoid_root_states[self.humanoid_indices]
        root_states_op = self._humanoid_root_states[self.humanoid_indices_op]

        obs = compute_strike_observations(root_states, root_states_op, 
                                          body_pos, body_rot,
                                          body_pos_op, body_vel_op,
                                          borderline=self.borderline_space
                                          )
        obs_op = compute_strike_observations(root_states=root_states_op,
                                             root_states_op=root_states,
                                             body_pos=body_pos_op,
                                             body_rot=body_rot_op,
                                             body_pos_op=body_pos,
                                             body_vel_op=body_vel,
                                             borderline=self.borderline_space)
        return obs, obs_op

    def _compute_reward(self, actions):

        root_states = self._humanoid_root_states[self.humanoid_indices]
        root_states_op = self._humanoid_root_states[self.humanoid_indices_op]

        body_pos = self._rigid_body_pos
        body_vel = self._rigid_body_vel
        prev_body_vel = self._prev_body_vel
        
        body_ang_vel = self._rigid_body_ang_vel
        prev_body_ang_vel = self._prev_body_ang_vel
        contact_force = self._contact_forces
        
        body_pos_op = self._rigid_body_pos_op
        contact_force_op = self._contact_forces_op

        self.rew_buf[:], force_ego_to_op, force_op_to_ego = compute_strike_reward(root_states=root_states,
                                                root_states_op=root_states_op,
                                                body_pos=body_pos,
                                                body_ang_vel=body_ang_vel,
                                                prev_body_ang_vel=prev_body_ang_vel,
                                                body_vel=body_vel,
                                                prev_body_vel=prev_body_vel,
                                                body_pos_op=body_pos_op,
                                                force_body_ids=self._force_body_ids,
                                                strike_body_ids=self._strike_body_ids,
                                                contact_force=contact_force,
                                                contact_force_op=contact_force_op,
                                                contact_body_ids=self._contact_body_ids,
                                                borderline=self.borderline_space,
                                                termination_heights=self._termination_heights,
                                                dt=self.dt)
        self.ego_to_op_damage += force_ego_to_op
        self.op_to_ego_damage += force_op_to_ego
        return

    def _compute_reset(self):
        self.reset_buf[:], self._terminate_buf[:],\
            self.extras['win'], self.extras['lose'], self.extras['draw'] = \
            compute_humanoid_reset(self.reset_buf, self.progress_buf,
                                   self.ego_to_op_damage,
                                   self.op_to_ego_damage,
                                   self._contact_forces, 
                                   self._contact_forces_op,
                                   self._contact_body_ids,
                                   self._rigid_body_pos,
                                   self._rigid_body_pos_op,
                                   self.max_episode_length,
                                   self._enable_early_termination,
                                   self._termination_heights,
                                   self.borderline_space)
        return

#####################################################################
###=========================jit functions=========================###
#####################################################################

@torch.jit.script
def compute_strike_observations(root_states, root_states_op, body_pos, body_rot,
                                body_pos_op, body_vel_op, borderline,
                                ):
    # type: (Tensor, Tensor, Tensor, Tensor, Tensor,Tensor,float) -> Tensor
    root_pos = root_states[:, 0:3]
    root_rot = root_states[:, 3:7]
    ego_sword_pos = body_pos[:, 6, :]
    ego_sword_rot = body_rot[:, 6, :]
    ego_shield_pos = body_pos[:, 9, :]
    ego_shield_rot = body_rot[:, 9, :]

    root_pos_op = root_states_op[:, 0:3]
    root_rot_op = root_states_op[:, 3:7]
    root_vel_op = root_states_op[:, 7:10]
    root_ang_op = root_states_op[:, 10:13]
    op_sword_pos = body_pos_op[:, 6, :]
    op_sword_vel = body_vel_op[:, 6, :]
    op_torso_pos = body_pos_op[:, 1, :]
    op_torso_vel = body_vel_op[:, 1, :]
    op_head_pos = body_pos_op[:, 2, :]
    op_head_vel = body_vel_op[:, 2, :]
    op_right_upper_arm_pos = body_pos_op[:, 3, :]
    op_right_thigh_pos = body_pos_op[:, 11, :]
    op_left_thigh_pos = body_pos_op[:, 14, :]

    ##*******************************************************##
    relative_x_1 =  borderline - root_pos[:, 0]
    relative_x_2 = root_pos[:, 0] + borderline
    relative_x = torch.minimum(relative_x_1, relative_x_2)
    relative_x = torch.unsqueeze(relative_x, -1)
    relative_y_1 =  borderline - root_pos[:, 1]
    relative_y_2 = root_pos[:,1] + borderline
    relative_y = torch.minimum(relative_y_1, relative_y_2)
    relative_y = torch.unsqueeze(relative_y, -1)
    ##*******************************************************##

    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
    sword_rot = torch_utils.calc_heading_quat_inv(ego_sword_rot)
    shield_rot = torch_utils.calc_heading_quat_inv(ego_shield_rot)

    local_op_relative_pos = root_pos_op - root_pos
    local_op_relative_pos[..., -1] = root_pos_op[..., -1]
    local_op_relative_pos = quat_rotate(heading_rot, local_op_relative_pos)

    local_op_vel = quat_rotate(heading_rot, root_vel_op)
    local_op_ang_vel = quat_rotate(heading_rot, root_ang_op)

    local_op_rot = quat_mul(heading_rot, root_rot_op)
    local_op_rot_obs = torch_utils.quat_to_tan_norm(local_op_rot)
    ##*******************************************************##

    # op sword relative ego position and vel
    local_op_relative_sword_pos = op_sword_pos - root_pos
    local_op_relative_sword_pos = quat_rotate(heading_rot, local_op_relative_sword_pos)
    local_op_sword_vel = quat_rotate(heading_rot, op_sword_vel)
    
    # op sword relative ego shield position and vel
    local_op_sword_shield_pos = op_sword_pos - ego_shield_pos
    local_op_sword_shield_pos = quat_rotate(shield_rot, local_op_sword_shield_pos)
    local_op_sword_shield_vel = quat_rotate(shield_rot, op_sword_vel)
    
    # relative position and vel of ego sword and op up body
    relative_sword_torso_pos = op_torso_pos - ego_sword_pos
    relative_sword_torso_pos = quat_rotate(sword_rot, relative_sword_torso_pos)
    relative_sword_torso_vel = quat_rotate(sword_rot, op_torso_vel)
    relative_sword_head_pos = op_head_pos - ego_sword_pos
    relative_sword_head_pos = quat_rotate(sword_rot, relative_sword_head_pos)
    relative_sword_head_vel = quat_rotate(sword_rot, op_head_vel)
    relative_sword_right_arm_pos = op_right_upper_arm_pos - ego_sword_pos
    relative_sword_right_arm_pos = quat_rotate(sword_rot, relative_sword_right_arm_pos)
    relative_sword_right_thigh_pos = op_right_thigh_pos - ego_sword_pos
    relative_sword_right_thigh_pos = quat_rotate(sword_rot, relative_sword_right_thigh_pos)
    relative_sword_left_thigh_pos = op_left_thigh_pos - ego_sword_pos
    relative_sword_left_thigh_pos = quat_rotate(sword_rot, relative_sword_left_thigh_pos)

    obs = torch.cat([relative_x, relative_y,
                     local_op_relative_pos, local_op_rot_obs,
                     local_op_vel, local_op_ang_vel,
                     local_op_relative_sword_pos, local_op_sword_vel,
                     local_op_sword_shield_pos, local_op_sword_shield_vel,
                     relative_sword_torso_pos, relative_sword_torso_vel,
                     relative_sword_head_pos, relative_sword_head_vel,
                     relative_sword_right_arm_pos, relative_sword_right_thigh_pos,
                     relative_sword_left_thigh_pos
                     ], dim=-1)
    return obs

@torch.jit.script
def compute_strike_reward(root_states, root_states_op, body_pos, body_ang_vel,
                          prev_body_ang_vel, body_vel, prev_body_vel,
                          body_pos_op, force_body_ids, strike_body_ids,
                          contact_force, contact_force_op, contact_body_ids,
                          borderline, termination_heights, dt):
    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor,Tensor,Tensor,Tensor,float, Tensor, float) -> Tuple[Tensor, Tensor,Tensor]

    op_fall_reward_w = 200.0
    ego_fall_out_reward_w = 50.0
    shield_to_sword_pos_reward_w = 1.0
    damage_reward_w = 8.0
    sword_to_op_reward_w = 0.8
    reward_energy_w = 3.0
    reward_strike_vel_acc_w = 3.0
    reward_face_w = 4.0
    reward_foot_to_op_w = 10.0
    reward_kick_w = 2.0

    num_envs = root_states.shape[0]
    reward = torch.zeros((num_envs, 1), dtype=torch.float32)
    root_xy_pos = root_states[:, 0:2]
    root_pos = root_states[:, 0:3]
    ego_sword_pos = body_pos[:, 6, 0:3]
    ego_shield_pos = body_pos[:, 9, 0:3]
    ego_right_foot_pos = body_pos[:, 13, 0:3]
    op_sword_pos = body_pos_op[:, 6, 0:3]
    op_torse_pos = body_pos_op[:, 1, 0:3]
    op_right_thigh_pos = body_pos_op[:, 11, 0:3]
    op_left_thigh_pos = body_pos_op[:, 14, 0:3]
    root_pos_xy_op = root_states_op[:, 0:2]
    root_pos_xy = root_states[:, 0:2]
    root_pos_op = root_states_op[:, 0:3]
    root_rot = root_states[:, 3:7]
    root_rot_op = root_states_op[:, 3:7]
    up = torch.zeros_like(root_pos_op)
    up[..., -1] = 1
    contact_buf = contact_force.clone()
    contact_buf_op = contact_force_op.clone()

    ##*****************r energy******************##
    strike_body_vel = body_vel[:, strike_body_ids, :]
    strike_body_vel_norm = torch.sum(torch.norm(strike_body_vel, dim=-1), dim=1)
    strike_body_vel_norm = torch.clamp(strike_body_vel_norm, max=20)
    distance = root_pos_xy_op - root_xy_pos
    distance = torch.norm(distance, dim=-1)
    zeros = torch.zeros_like(distance)
    k_dist = torch.exp(-10 * torch.maximum(zeros, distance - 2.0))
    r_energy = k_dist * strike_body_vel_norm
    r_energy = r_energy / 20.
    
    strike_vel_dfff = body_vel[:, strike_body_ids, :] - prev_body_vel[:, strike_body_ids, :]
    strike_vel_acc = strike_vel_dfff / dt
    strike_vel_acc = torch.sum(torch.norm(strike_vel_acc, dim=-1), dim=1)
    strike_vel_acc = torch.clamp(strike_vel_acc, max=1000)
    strike_vel_acc = k_dist * strike_vel_acc / 500
    r_strike_vel_acc = strike_vel_acc
    ##*****************r damage******************##
    ego_to_op_force = contact_buf_op[:, force_body_ids, :]

    op_to_ego_force = contact_buf[:, force_body_ids, :]

    force_ego_to_op = torch.norm(ego_to_op_force, dim=2).sum(dim=1)

    force_op_to_ego = torch.norm(op_to_ego_force, dim=2).sum(dim=1)

    r_damage = force_ego_to_op - force_op_to_ego * 2
    r_damage = torch.clamp(r_damage, min= -200.)
    r_damage /= 100

    ##*****************r kick******************##
    ego_foot_op_torse_distance = op_torse_pos - ego_right_foot_pos
    ego_foot_op_torse_err = torch.norm(ego_foot_op_torse_distance, dim=-1)
    succ_foot = ego_foot_op_torse_err < 0.1
    r_foot_to_op = torch.exp(-0.5 * ego_foot_op_torse_err)
    constant_r = torch.ones_like(r_foot_to_op)
    r_foot_to_op = torch.where(succ_foot, constant_r, r_foot_to_op)
    
    foot_height = ego_right_foot_pos[..., 2]
    succ_kick = foot_height >= 0.4
    zeros = torch.zeros_like(succ_kick)
    constant_r_kick = torch.ones_like(succ_kick)
    r_kick = torch.where(succ_kick, constant_r_kick, foot_height)
    
    ##*****************r close******************##
    # sword -> torso
    pos_err_scale1 = 1.0
    pos_err_scale2 = 2.0

    sword_torse_distance = op_torse_pos - ego_sword_pos
    sword_torse_err = torch.sum(sword_torse_distance * sword_torse_distance, dim=-1)

    sword_right_thigh_distance = op_right_thigh_pos - ego_sword_pos
    sword_right_thigh_err = torch.sum(sword_right_thigh_distance * sword_right_thigh_distance, dim=-1)

    sword_left_thigh_distance = op_left_thigh_pos - ego_sword_pos
    sword_left_thigh_err = torch.sum(sword_left_thigh_distance * sword_left_thigh_distance, dim=-1)

    sword_sword_distance = op_sword_pos - ego_sword_pos
    sword_sword_err = torch.sum(sword_sword_distance * sword_sword_distance, dim=-1)
    
    # zeros = torch.zeros_like(sword_torse_distance)
    r_close = torch.exp(-pos_err_scale1 * sword_torse_err) # -> [0, 1]
    r_close += torch.exp(-pos_err_scale1 * sword_right_thigh_err)
    r_close += torch.exp(-pos_err_scale1 * sword_left_thigh_err)
    r_close += torch.exp(-pos_err_scale2 * sword_sword_err)
    ##*****************r shelid with op sword******************##
    pos_err_scale3 = 2.0
    ego_shield_op_sword_distance = op_sword_pos - ego_shield_pos
    ego_shield_op_sword_err = torch.sum(ego_shield_op_sword_distance * ego_shield_op_sword_distance, dim=-1)
    r_shield_to_sword = torch.exp(-pos_err_scale3 * ego_shield_op_sword_err)

    ##*****************r face******************##
    tar_dir = root_pos_xy_op - root_xy_pos
    tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1)

    heading_rot = torch_utils.calc_heading_quat(root_rot)
    facing_dir = torch.zeros_like(root_pos)
    facing_dir[..., 0] = 1.0
    facing_dir = quat_rotate(heading_rot, facing_dir)
    facing_err = torch.sum(tar_dir * facing_dir[..., 0:2], dim=-1)
    facing_reward = torch.clamp_min(facing_err, 0.0)

    ##*****************r op fall******************##
    masked_contact_buf_op = contact_buf_op.clone()
    masked_contact_buf_op[:, contact_body_ids, :] = 0
    fall_contact_op = torch.any(torch.abs(masked_contact_buf_op) > 0.1, dim=-1)
    fall_contact_op = torch.any(fall_contact_op, dim=-1)

    body_height_op = body_pos_op[..., 2]
    fall_height_op = body_height_op < termination_heights
    fall_height_op[:, contact_body_ids] = False
    fall_height_op = torch.any(fall_height_op, dim=-1)
    has_fallen_op = torch.logical_and(fall_contact_op, fall_height_op)

    op_up = quat_rotate(root_rot_op, up)
    op_rot_err = torch.sum(up * op_up, dim=-1)
    op_rot_r = 0.6 * torch.clamp_min(1.0 - op_rot_err, 0.0) # -> [0, 1] succ = op_rot_err < 0.2
    op_rot_r = torch.where(has_fallen_op, torch.ones_like(op_rot_r), op_rot_r)

    # test, when op fall, then r_close = 0 to encourage to agents separate.
    r_separate = torch.norm((root_pos_xy_op - root_pos_xy), dim=-1)
    r_separate = torch.where(r_separate > 0.1, r_separate, torch.zeros_like(r_separate))
    r_close = torch.where(has_fallen_op, r_separate, r_close)
    r_shield_to_sword = torch.where(has_fallen_op, torch.zeros_like(r_shield_to_sword), r_shield_to_sword)
    
    ##*****************r penalty******************##
    relative_x_1 =  borderline - root_xy_pos[:, 0]
    relative_x_2 = root_xy_pos[:, 0] + borderline
    relative_x = torch.minimum(relative_x_1, relative_x_2)
    relative_x = relative_x < 0
    relative_y_1 =  borderline - root_xy_pos[:, 1]
    relative_y_2 = root_xy_pos[:,1] + borderline
    relative_y = torch.minimum(relative_y_1, relative_y_2)
    relative_y = relative_y < 0
    is_out = relative_x | relative_y
    r_penalty = is_out * 1.0

    masked_contact_buf = contact_force.clone()
    masked_contact_buf[:, contact_body_ids, :] = 0
    fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
    fall_contact = torch.any(fall_contact, dim=-1)
    body_height = body_pos[..., 2]
    fall_height = body_height < termination_heights  
    fall_height[:, contact_body_ids] = False
    fall_height = torch.any(fall_height, dim=-1)
    has_fallen_ego = torch.logical_and(fall_contact, fall_height)
    r_penalty += has_fallen_ego * 1.0

    ##*****************r penalty******************##
    reward = -r_penalty * ego_fall_out_reward_w + op_rot_r * op_fall_reward_w + \
        r_shield_to_sword * shield_to_sword_pos_reward_w + r_close * sword_to_op_reward_w +\
            r_damage * damage_reward_w + r_energy * reward_energy_w + facing_reward * reward_face_w + \
                r_strike_vel_acc * reward_strike_vel_acc_w + r_foot_to_op * reward_foot_to_op_w +\
                    r_kick * reward_kick_w

    return reward, force_ego_to_op, force_op_to_ego


@torch.jit.script
def compute_humanoid_reset(reset_buf, progress_buf, ego_to_op_damage, op_to_ego_damage,
                           contact_buf, contact_buf_op, contact_body_ids,
                           rigid_body_pos, rigid_body_pos_op, max_episode_length,
                           enable_early_termination, termination_heights, borderline):
    # type: (Tensor, Tensor, Tensor, Tensor,Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor, float) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor]

    terminated = torch.zeros_like(reset_buf)

    if (enable_early_termination):
        masked_contact_buf = contact_buf.clone()
        masked_contact_buf_op = contact_buf_op.clone()
        masked_contact_buf[:, contact_body_ids, :] = 0
        masked_contact_buf_op[:, contact_body_ids, :] = 0
        fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
        fall_contact = torch.any(fall_contact, dim=-1)
        fall_contact_op = torch.any(torch.abs(masked_contact_buf_op) > 0.1, dim=-1)
        fall_contact_op = torch.any(fall_contact_op, dim=-1)

        body_height = rigid_body_pos[..., 2]
        body_height_op = rigid_body_pos_op[..., 2]
        fall_height = body_height < termination_heights
        fall_height_op = body_height_op < termination_heights
        fall_height[:, contact_body_ids] = False
        fall_height_op[:, contact_body_ids] = False
        fall_height = torch.any(fall_height, dim=-1)
        fall_height_op = torch.any(fall_height_op, dim=-1)

        ## out area
        root_pos = rigid_body_pos[:, 0, 0:2]
        root_pos_op = rigid_body_pos_op[:, 0, 0:2]
        relative_x_1 =  borderline - root_pos[:, 0]
        relative_x_2 = root_pos[:, 0] + borderline
        relative_x = torch.minimum(relative_x_1, relative_x_2)
        relative_x = relative_x < 0
        relative_y_1 =  borderline - root_pos[:, 1]
        relative_y_2 = root_pos[:,1] + borderline
        relative_y = torch.minimum(relative_y_1, relative_y_2)
        relative_y = relative_y < 0
        is_out_ego = relative_x | relative_y

        relative_x_1_op =  borderline - root_pos_op[:, 0]
        relative_x_2_op = root_pos_op[:, 0] + borderline
        relative_x_op = torch.minimum(relative_x_1_op, relative_x_2_op)
        relative_x_op = relative_x_op < 0
        relative_y_1_op =  borderline - root_pos_op[:, 1]
        relative_y_2_op = root_pos_op[:,1] + borderline
        relative_y_op = torch.minimum(relative_y_1_op, relative_y_2_op)
        relative_y_op = relative_y_op < 0
        is_out_op = relative_x_op | relative_y_op

        is_out = is_out_ego | is_out_op
        
        has_failed = is_out

        # first timestep can sometimes still have nonzero contact forces
        # so only check after first couple of steps
        has_failed *= (progress_buf > 1)

        terminated = torch.where(has_failed, torch.ones_like(reset_buf), terminated)
    damage_ego_more_than_op = ego_to_op_damage > op_to_ego_damage
    damage_op_more_than_ego = op_to_ego_damage > ego_to_op_damage

    reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)
    win = torch.where(reset, damage_ego_more_than_op, torch.zeros_like(reset_buf, dtype=torch.bool))
    lose = torch.where(reset, damage_op_more_than_ego, torch.zeros_like(reset_buf, dtype=torch.bool))
    draw = torch.where(reset, ego_to_op_damage == op_to_ego_damage, torch.zeros_like(reset_buf, dtype=torch.bool))
    
    
    return reset, terminated, win, lose, draw

@torch.jit.script
def expand_env_ids(env_ids, n_agents):
    # type: (Tensor, int) -> Tensor
    device = env_ids.device
    agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
    for idx in range(n_agents):
        agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
    return agent_env_ids


================================================
FILE: timechamber/train.py
================================================
# train.py
# Script to train policies in Isaac Gym
#
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import datetime
from statistics import mode
import isaacgym
import os
import hydra
import yaml
from omegaconf import DictConfig, OmegaConf
from hydra.utils import to_absolute_path
import gym

from timechamber.utils.reformat import omegaconf_to_dict, print_dict
from timechamber.utils.utils import set_np_formatting, set_seed
from timechamber.utils.rlgames_utils import RLGPUEnv, RLGPUAlgoObserver, get_rlgames_env_creator
from rl_games.common import env_configurations, vecenv
from rl_games.torch_runner import Runner
from rl_games.algos_torch import model_builder
from timechamber.ase import ase_agent
from timechamber.ase import ase_models
from timechamber.ase import ase_network_builder
from timechamber.ase import hrl_models
from timechamber.ase import hrl_network_builder
from timechamber.learning import ppo_sp_agent
from timechamber.learning import hrl_sp_agent
from timechamber.learning import ppo_sp_player
from timechamber.learning import hrl_sp_player
from timechamber.learning import vectorized_models
from timechamber.learning import vectorized_network_builder
import timechamber


## OmegaConf & Hydra Config

# Resolvers used in hydra configs (see https://omegaconf.readthedocs.io/en/2.1_branch/usage.html#resolvers)
@hydra.main(config_name="config", config_path="./cfg")
def launch_rlg_hydra(cfg: DictConfig):

    time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    run_name = f"{cfg.wandb_name}_{time_str}"

    # ensure checkpoints can be specified as relative paths
    if cfg.checkpoint:
        cfg.checkpoint = to_absolute_path(cfg.checkpoint)

    cfg_dict = omegaconf_to_dict(cfg)
    print_dict(cfg_dict)

    # set numpy formatting for printing only
    set_np_formatting()

    rank = int(os.getenv("LOCAL_RANK", "0"))
    if cfg.multi_gpu:
        # torchrun --standalone --nnodes=1 --nproc_per_node=2 train.py
        cfg.sim_device = f'cuda:{rank}'
        cfg.rl_device = f'cuda:{rank}'

    # sets seed. if seed is -1 will pick a random one
    cfg.seed += rank
    cfg.seed = set_seed(cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=rank)

    if cfg.wandb_activate and rank == 0:
        # Make sure to install WandB if you actually use this.
        import wandb

        run = wandb.init(
            project=cfg.wandb_project,
            group=cfg.wandb_group,
            entity=cfg.wandb_entity,
            config=cfg_dict,
            sync_tensorboard=True,
            name=run_name,
            resume="allow",
        )

    def create_env_thunk(**kwargs):
        envs = timechamber.make(
            cfg.seed,
            cfg.task_name,
            cfg.task.env.numEnvs,
            cfg.sim_device,
            cfg.rl_device,
            cfg.graphics_device_id,
            cfg.device_type,
            cfg.headless,
            cfg.multi_gpu,
            cfg.capture_video,
            cfg.force_render,
            cfg,
            **kwargs,
        )
        if cfg.capture_video:
            envs.is_vector_env = True
            envs = gym.wrappers.RecordVideo(
                envs,
                f"videos/{run_name}",
                step_trigger=lambda step: step % cfg.capture_video_freq == 0,
                video_length=cfg.capture_video_len,
            )
        return envs

    # register the rl-games adapter to use inside the runner
    vecenv.register('RLGPU',
                    lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs))

    env_configurations.register('rlgpu', {
        'vecenv_type': 'RLGPU',
        'env_creator': create_env_thunk,
    })

    # register new AMP network builder and agent
    def build_runner(algo_observer):
        runner = Runner(algo_observer)
        runner.algo_factory.register_builder('self_play_continuous', lambda **kwargs: ppo_sp_agent.SPAgent(**kwargs))
        runner.algo_factory.register_builder('self_play_hrl', lambda **kwargs: hrl_sp_agent.HRLSPAgent(**kwargs))
        runner.algo_factory.register_builder('ase', lambda **kwargs: ase_agent.ASEAgent(**kwargs))

        runner.player_factory.register_builder('self_play_continuous',
                                               lambda **kwargs: ppo_sp_player.SPPlayer(**kwargs))
        runner.player_factory.register_builder('self_play_hrl',
                                               lambda **kwargs: hrl_sp_player.HRLSPPlayer(**kwargs))
        # runner.
        model_builder.register_model('hrl', lambda network, **kwargs: hrl_models.ModelHRLContinuous(network))
        model_builder.register_model('ase', lambda network, **kwargs: ase_models.ModelASEContinuous(network))
        model_builder.register_model('vectorized_a2c',
                                     lambda network, **kwargs: vectorized_models.ModelVectorizedA2C(network))
        model_builder.register_network('vectorized_a2c',
                                       lambda **kwargs: vectorized_network_builder.VectorizedA2CBuilder())
        model_builder.register_network('ase', lambda **kwargs: ase_network_builder.ASEBuilder())
        model_builder.register_network('hrl', lambda **kwargs: hrl_network_builder.HRLBuilder())
        
        return runner

    rlg_config_dict = omegaconf_to_dict(cfg.train)

    # convert CLI arguments into dictionory
    # create runner and set the settings
    runner = build_runner(RLGPUAlgoObserver())
    runner.load(rlg_config_dict)
    runner.reset()

    # dump config dict
    experiment_dir = os.path.join('runs', cfg.train.params.config.name)
    os.makedirs(experiment_dir, exist_ok=True)
    with open(os.path.join(experiment_dir, 'config.yaml'), 'w') as f:
        f.write(OmegaConf.to_yaml(cfg))

    if cfg.multi_gpu:
        import horovod.torch as hvd

        rank = hvd.rank()
    else:
        rank = 0

    if cfg.wandb_activate and rank == 0:
        # Make sure to install WandB if you actually use this.
        import wandb

        wandb.init(
            project=cfg.wandb_project,
            group=cfg.wandb_group,
            entity=cfg.wandb_entity,
            config=cfg_dict,
            sync_tensorboard=True,
            id=run_name,
            resume="allow",
            monitor_gym=True,
        )

    runner.run({
        'train': not cfg.test,
        'play': cfg.test,
        'checkpoint': cfg.checkpoint,
        'sigma': None
    })

    if cfg.wandb_activate and rank == 0:
        wandb.finish()


if __name__ == "__main__":
    launch_rlg_hydra()


================================================
FILE: timechamber/utils/config.py
================================================
import os
import sys
import yaml

from isaacgym import gymapi
from isaacgym import gymutil

import numpy as np
import random
import torch

SIM_TIMESTEP = 1.0 / 60.0

def parse_sim_params(args, cfg):
    # initialize sim
    sim_params = gymapi.SimParams()
    sim_params.dt = SIM_TIMESTEP
    sim_params.num_client_threads = args.num_subscenes
    if args.physics_engine == "flex":
        if args.device_type != "cpu":
            print("WARNING: Using Flex with GPU instead of PHYSX!")
        sim_params.flex.shape_collision_margin = 0.01
        sim_params.flex.num_outer_iterations = 4
        sim_params.flex.num_inner_iterations = 10
    elif args.physics_engine == "physx":
        sim_params.physx.solver_type = 1
        sim_params.physx.num_position_iterations = 4
        sim_params.physx.num_velocity_iterations = 0
        sim_params.physx.num_threads = 4
        sim_params.physx.use_gpu = args.use_gpu
        sim_params.physx.num_subscenes = args.num_subscenes
        sim_params.physx.max_gpu_contact_pairs = 8 * 1024 * 1024

    sim_params.use_gpu_pipeline = args.use_gpu_pipeline
    sim_params.physx.use_gpu = args.use_gpu

    # if sim options are provided in cfg, parse them and update/override above:
    if "sim" in cfg:
        gymutil.parse_sim_config(cfg["sim"], sim_params)

    # Override num_threads if passed on the command line
    if args.physics_engine == "physx" and args.num_threads > 0:
        sim_params.physx.num_threads = args.num_threads

    return sim_params

================================================
FILE: timechamber/utils/gym_util.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from timechamber.utils import logger
from isaacgym import gymapi
import numpy as np
import torch
from isaacgym.torch_utils import *
from isaacgym import gymtorch

def setup_gym_viewer(config):
    gym = initialize_gym(config)
    sim, viewer = configure_gym(gym, config)
    return gym, sim, viewer


def initialize_gym(config):
    gym = gymapi.acquire_gym()
    if not gym.initialize():
        logger.warn("*** Failed to initialize gym")
        quit()

    return gym


def configure_gym(gym, config):
    engine, render = config['engine'], config['render']

    # physics engine settings
    if(engine == 'FLEX'):
        sim_engine = gymapi.SIM_FLEX
    elif(engine == 'PHYSX'):
        sim_engine = gymapi.SIM_PHYSX
    else:
        logger.warn("Uknown physics engine. defaulting to FLEX")
        sim_engine = gymapi.SIM_FLEX

    # gym viewer
    if render:
        # create viewer
        sim = gym.create_sim(0, 0, sim_type=sim_engine)
        viewer = gym.create_viewer(
            sim, int(gymapi.DEFAULT_VIEWER_WIDTH / 1.25),
            int(gymapi.DEFAULT_VIEWER_HEIGHT / 1.25)
        )

        if viewer is None:
            logger.warn("*** Failed to create viewer")
            quit()

        # enable left mouse click or space bar for throwing projectiles
        if config['add_projectiles']:
            gym.subscribe_viewer_mouse_event(viewer, gymapi.MOUSE_LEFT_BUTTON, "shoot")
            gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_SPACE, "shoot")

    else:
        sim = gym.create_sim(0, -1)
        viewer = None

    # simulation params
    scene_config = config['env']['scene']
    sim_params = gymapi.SimParams()
    sim_params.solver_type = scene_config['SolverType']
    sim_params.num_outer_iterations = scene_config['NumIterations']
    sim_params.num_inner_iterations = scene_config['NumInnerIterations']
    sim_params.relaxation = scene_config.get('Relaxation', 0.75)
    sim_params.warm_start = scene_config.get('WarmStart', 0.25)
    sim_params.geometric_stiffness = scene_config.get('GeometricStiffness', 1.0)
    sim_params.shape_collision_margin = 0.01

    sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0)
    gym.set_sim_params(sim, sim_params)

    return sim, viewer


def parse_states_from_reference_states(reference_states, progress):
    # parse reference states from DeepMimicState
    global_quats_ref = torch.tensor(
        reference_states._global_rotation[(progress,)].numpy(),
        dtype=torch.double
    ).cuda()
    ts_ref = torch.tensor(
        reference_states._translation[(progress,)].numpy(),
        dtype=torch.double
    ).cuda()
    vels_ref = torch.tensor(
        reference_states._velocity[(progress,)].numpy(),
        dtype=torch.double
    ).cuda()
    avels_ref = torch.tensor(
        reference_states._angular_velocity[(progress,)].numpy(),
        dtype=torch.double
    ).cuda()
    return global_quats_ref, ts_ref, vels_ref, avels_ref


def parse_states_from_reference_states_with_motion_id(precomputed_state,
                                                      progress, motion_id):
    assert len(progress) == len(motion_id)
    # get the global id
    global_id = precomputed_state['motion_offset'][motion_id] + progress
    global_id = np.minimum(global_id,
                           precomputed_state['global_quats_ref'].shape[0] - 1)

    # parse reference states from DeepMimicState
    global_quats_ref = precomputed_state['global_quats_ref'][global_id]
    ts_ref = precomputed_state['ts_ref'][global_id]
    vels_ref = precomputed_state['vels_ref'][global_id]
    avels_ref = precomputed_state['avels_ref'][global_id]
    return global_quats_ref, ts_ref, vels_ref, avels_ref


def parse_dof_state_with_motion_id(precomputed_state, dof_state,
                                   progress, motion_id):
    assert len(progress) == len(motion_id)
    # get the global id
    global_id = precomputed_state['motion_offset'][motion_id] + progress
    # NOTE: it should never reach the dof_state.shape, cause the episode is
    # terminated 2 steps before
    global_id = np.minimum(global_id, dof_state.shape[0] - 1)

    # parse reference states from DeepMimicState
    return dof_state[global_id]


def get_flatten_ids(precomputed_state):
    motion_offsets = precomputed_state['motion_offset']
    init_state_id, init_motion_id, global_id = [], [], []
    for i_motion in range(len(motion_offsets) - 1):
        i_length = motion_offsets[i_motion + 1] - motion_offsets[i_motion]
        init_state_id.extend(range(i_length))
        init_motion_id.extend([i_motion] * i_length)
        if len(global_id) == 0:
            global_id.extend(range(0, i_length))
        else:
            global_id.extend(range(global_id[-1] + 1,
                                   global_id[-1] + i_length + 1))
    return np.array(init_state_id), np.array(init_motion_id), \
        np.array(global_id)


def parse_states_from_reference_states_with_global_id(precomputed_state,
                                                      global_id):
    # get the global id
    global_id = global_id % precomputed_state['global_quats_ref'].shape[0]

    # parse reference states from DeepMimicState
    global_quats_ref = precomputed_state['global_quats_ref'][global_id]
    ts_ref = precomputed_state['ts_ref'][global_id]
    vels_ref = precomputed_state['vels_ref'][global_id]
    avels_ref = precomputed_state['avels_ref'][global_id]
    return global_quats_ref, ts_ref, vels_ref, avels_ref


def get_robot_states_from_torch_tensor(config, ts, global_quats, vels, avels,
                                       init_rot, progress, motion_length=-1,
                                       actions=None, relative_rot=None,
                                       motion_id=None, num_motion=None,
                                       motion_onehot_matrix=None):
    info = {}
    # the observation with quaternion-based representation
    torso_height = ts[..., 0, 1].cpu().numpy()
    gttrny, gqny, vny, avny, info['root_yaw_inv'] = \
        quaternion_math.compute_observation_return_info(global_quats, ts,
                                                        vels, avels)
    joint_obs = np.concatenate([gttrny.cpu().numpy(), gqny.cpu().numpy(),
                                vny.cpu().numpy(), avny.cpu().numpy()], axis=-1)
    joint_obs = joint_obs.reshape(joint_obs.shape[0], -1)
    num_envs = joint_obs.shape[0]
    obs = np.concatenate([torso_height[:, np.newaxis], joint_obs], -1)

    # the previous action
    if config['env_action_ob']:
        obs = np.concatenate([obs, actions], axis=-1)

    # the orientation
    if config['env_orientation_ob']:
        if relative_rot is not None:
            obs = np.concatenate([obs, relative_rot], axis=-1)
        else:
            curr_rot = global_quats[np.arange(num_envs)][:, 0]
            curr_rot = curr_rot.reshape(num_envs, -1, 4)
            relative_rot = quaternion_math.compute_orientation_drift(
                init_rot, curr_rot
            ).cpu().numpy()
            obs = np.concatenate([obs, relative_rot], axis=-1)

    if config['env_frame_ob']:
        if type(motion_length) == np.ndarray:
            motion_length = motion_length.astype(np.float)
            progress_ob = np.expand_dims(progress.astype(np.float) /
                                         motion_length, axis=-1)
        else:
            progress_ob = np.expand_dims(progress.astype(np.float) /
                                         float(motion_length), axis=-1)
        obs = np.concatenate([obs, progress_ob], axis=-1)

    if config['env_motion_ob'] and not config['env_motion_ob_onehot']:
        motion_id_ob = np.expand_dims(motion_id.astype(np.float) /
                                      float(num_motion), axis=-1)
        obs = np.concatenate([obs, motion_id_ob], axis=-1)
    elif config['env_motion_ob'] and config['env_motion_ob_onehot']:
        motion_id_ob = motion_onehot_matrix[motion_id]
        obs = np.concatenate([obs, motion_id_ob], axis=-1)

    return obs, info


def get_xyzoffset(start_ts, end_ts, root_yaw_inv):
    xyoffset = (end_ts - start_ts)[:, [0], :].reshape(1, -1, 1, 3)
    ryinv = root_yaw_inv.reshape(1, -1, 1, 4)

    calibrated_xyz_offset = quaternion_math.quat_apply(ryinv, xyoffset)[0, :, 0, :]
    return calibrated_xyz_offset


================================================
FILE: timechamber/utils/logger.py
================================================
# -----------------------------------------------------------------------------
#   @brief:
#       The logger here will be called all across the project. It is inspired
#   by Yuxin Wu (ppwwyyxx@gmail.com)
#
#   @author:
#       Tingwu Wang, 2017, Feb, 20th
# -----------------------------------------------------------------------------

import logging
import sys
import os
import datetime
from termcolor import colored

__all__ = ['set_file_handler']  # the actual worker is the '_logger'


class _MyFormatter(logging.Formatter):
    '''
        @brief:
            a class to make sure the format could be used
    '''

    def format(self, record):
        date = colored('[%(asctime)s @%(filename)s:%(lineno)d]', 'green')
        msg = '%(message)s'

        if record.levelno == logging.WARNING:
            fmt = date + ' ' + \
                colored('WRN', 'red', attrs=[]) + ' ' + msg
        elif record.levelno == logging.ERROR or \
                record.levelno == logging.CRITICAL:
            fmt = date + ' ' + \
                colored('ERR', 'red', attrs=['underline']) + ' ' + msg
        else:
            fmt = date + ' ' + msg

        if hasattr(self, '_style'):
            # Python3 compatibilty
            self._style._fmt = fmt
        self._fmt = fmt

        return super(self.__class__, self).format(record)


_logger = logging.getLogger('joint_embedding')
_logger.propagate = False
_logger.setLevel(logging.INFO)

# set the console output handler
con_handler = logging.StreamHandler(sys.stdout)
con_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
_logger.addHandler(con_handler)


class GLOBAL_PATH(object):

    def __init__(self, path=None):
        if path is None:
            path = os.getcwd()
        self.path = path

    def _set_path(self, path):
        self.path = path

    def _get_path(self):
        return self.path


PATH = GLOBAL_PATH()


def set_file_handler(path=None, prefix='', time_str=''):
    # set the file output handler
    if time_str == '':
        file_name = prefix + \
            datetime.datetime.now().strftime("%A_%d_%B_%Y_%I:%M%p") + '.log'
    else:
        file_name = prefix + time_str + '.log'

    if path is None:
        mod = sys.modules['__main__']
        path = os.path.join(os.path.abspath(mod.__file__), '..', '..', 'log')
    else:
        path = os.path.join(path, 'log')
    path = os.path.abspath(path)

    path = os.path.join(path, file_name)
    if not os.path.exists(path):
        os.makedirs(path)

    PATH._set_path(path)
    path = os.path.join(path, file_name)
    from tensorboard_logger import configure
    configure(path)

    file_handler = logging.FileHandler(
        filename=os.path.join(path, 'logger'), encoding='utf-8', mode='w')
    file_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
    _logger.addHandler(file_handler)

    _logger.info('Log file set to {}'.format(path))
    return path


def _get_path():
    return PATH._get_path()


_LOGGING_METHOD = ['info', 'warning', 'error', 'critical',
                   'warn', 'exception', 'debug']

# export logger functions
for func in _LOGGING_METHOD:
    locals()[func] = getattr(_logger, func)


================================================
FILE: timechamber/utils/motion_lib.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
import os
import yaml

from timechamber.tasks.ase_humanoid_base.poselib.poselib.skeleton.skeleton3d import SkeletonMotion
from timechamber.tasks.ase_humanoid_base.poselib.poselib.core.rotation3d import *
from isaacgym.torch_utils import *

from utils import torch_utils

import torch

USE_CACHE = True
print("MOVING MOTION DATA TO GPU, USING CACHE:", USE_CACHE)

if not USE_CACHE:
    old_numpy = torch.Tensor.numpy
    class Patch:
        def numpy(self):
            if self.is_cuda:
                return self.to("cpu").numpy()
            else:
                return old_numpy(self)

    torch.Tensor.numpy = Patch.numpy

class DeviceCache:
    def __init__(self, obj, device):
        self.obj = obj
        self.device = device

        keys = dir(obj)
        num_added = 0
        for k in keys:
            try:
                out = getattr(obj, k)
            except:
                print("Error for key=", k)
                continue

            if isinstance(out, torch.Tensor):
                if out.is_floating_point():
                    out = out.to(self.device, dtype=torch.float32)
                else:
                    out.to(self.device)
                setattr(self, k, out)  
                num_added += 1
            elif isinstance(out, np.ndarray):
                out = torch.tensor(out)
                if out.is_floating_point():
                    out = out.to(self.device, dtype=torch.float32)
                else:
                    out.to(self.device)
                setattr(self, k, out)
                num_added += 1
        
        print("Total added", num_added)

    def __getattr__(self, string):
        out = getattr(self.obj, string)
        return out


class MotionLib():
    def __init__(self, motion_file, dof_body_ids, dof_offsets,
                 key_body_ids, device):
        self._dof_body_ids = dof_body_ids
        self._dof_offsets = dof_offsets
        self._num_dof = dof_offsets[-1]
        self._key_body_ids = torch.tensor(key_body_ids, device=device)
        self._device = device
        self._load_motions(motion_file)

        motions = self._motions
        self.gts = torch.cat([m.global_translation for m in motions], dim=0).float()
        self.grs = torch.cat([m.global_rotation for m in motions], dim=0).float()
        self.lrs = torch.cat([m.local_rotation for m in motions], dim=0).float()
        self.grvs = torch.cat([m.global_root_velocity for m in motions], dim=0).float()
        self.gravs = torch.cat([m.global_root_angular_velocity for m in motions], dim=0).float()
        self.dvs = torch.cat([m.dof_vels for m in motions], dim=0).float()

        lengths = self._motion_num_frames
        lengths_shifted = lengths.roll(1)
        lengths_shifted[0] = 0
        self.length_starts = lengths_shifted.cumsum(0)

        self.motion_ids = torch.arange(len(self._motions), dtype=torch.long, device=self._device)

        return

    def num_motions(self):
        return len(self._motions)

    def get_total_length(self):
        return sum(self._motion_lengths)

    def get_motion(self, motion_id):
        return self._motions[motion_id]

    def sample_motions(self, n):
        motion_ids = torch.multinomial(self._motion_weights, num_samples=n, replacement=True)

        # m = self.num_motions()
        # motion_ids = np.random.choice(m, size=n, replace=True, p=self._motion_weights)
        # motion_ids = torch.tensor(motion_ids, device=self._device, dtype=torch.long)
        return motion_ids

    def sample_time(self, motion_ids, truncate_time=None):
        n = len(motion_ids)
        phase = torch.rand(motion_ids.shape, device=self._device)
        
        motion_len = self._motion_lengths[motion_ids]
        if (truncate_time is not None):
            assert(truncate_time >= 0.0)
            motion_len -= truncate_time

        motion_time = phase * motion_len
        return motion_time

    def get_motion_length(self, motion_ids):
        return self._motion_lengths[motion_ids]

    def get_motion_state(self, motion_ids, motion_times):
        n = len(motion_ids)
        num_bodies = self._get_num_bodies()
        num_key_bodies = self._key_body_ids.shape[0]

        motion_len = self._motion_lengths[motion_ids]
        num_frames = self._motion_num_frames[motion_ids]
        dt = self._motion_dt[motion_ids]

        frame_idx0, frame_idx1, blend = self._calc_frame_blend(motion_times, motion_len, num_frames, dt)

        f0l = frame_idx0 + self.length_starts[motion_ids]
        f1l = frame_idx1 + self.length_starts[motion_ids]

        root_pos0 = self.gts[f0l, 0]
        root_pos1 = self.gts[f1l, 0]

        root_rot0 = self.grs[f0l, 0]
        root_rot1 = self.grs[f1l, 0]

        local_rot0 = self.lrs[f0l]
        local_rot1 = self.lrs[f1l]

        root_vel = self.grvs[f0l]

        root_ang_vel = self.gravs[f0l]
        
        key_pos0 = self.gts[f0l.unsqueeze(-1), self._key_body_ids.unsqueeze(0)]
        key_pos1 = self.gts[f1l.unsqueeze(-1), self._key_body_ids.unsqueeze(0)]

        dof_vel = self.dvs[f0l]

        vals = [root_pos0, root_pos1, local_rot0, local_rot1, root_vel, root_ang_vel, key_pos0, key_pos1]
        for v in vals:
            assert v.dtype != torch.float64


        blend = blend.unsqueeze(-1)

        root_pos = (1.0 - blend) * root_pos0 + blend * root_pos1

        root_rot = torch_utils.slerp(root_rot0, root_rot1, blend)

        blend_exp = blend.unsqueeze(-1)
        key_pos = (1.0 - blend_exp) * key_pos0 + blend_exp * key_pos1
        
        local_rot = torch_utils.slerp(local_rot0, local_rot1, torch.unsqueeze(blend, axis=-1))
        dof_pos = self._local_rotation_to_dof(local_rot)

        return root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos
    
    def _load_motions(self, motion_file):
        self._motions = []
        self._motion_lengths = []
        self._motion_weights = []
        self._motion_fps = []
        self._motion_dt = []
        self._motion_num_frames = []
        self._motion_files = []

        total_len = 0.0

        motion_files, motion_weights = self._fetch_motion_files(motion_file)
        num_motion_files = len(motion_files)
        for f in range(num_motion_files):
            curr_file = motion_files[f]
            print("Loading {:d}/{:d} motion files: {:s}".format(f + 1, num_motion_files, curr_file))
            curr_motion = SkeletonMotion.from_file(curr_file)

            motion_fps = curr_motion.fps
            curr_dt = 1.0 / motion_fps

            num_frames = curr_motion.tensor.shape[0]
            curr_len = 1.0 / motion_fps * (num_frames - 1)

            self._motion_fps.append(motion_fps)
            self._motion_dt.append(curr_dt)
            self._motion_num_frames.append(num_frames)
 
            curr_dof_vels = self._compute_motion_dof_vels(curr_motion)
            curr_motion.dof_vels = curr_dof_vels

            # Moving motion tensors to the GPU
            if USE_CACHE:
                curr_motion = DeviceCache(curr_motion, self._device)                
            else:
                curr_motion.tensor = curr_motion.tensor.to(self._device)
                curr_motion._skeleton_tree._parent_indices = curr_motion._skeleton_tree._parent_indices.to(self._device)
                curr_motion._skeleton_tree._local_translation = curr_motion._skeleton_tree._local_translation.to(self._device)
                curr_motion._rotation = curr_motion._rotation.to(self._device)

            self._motions.append(curr_motion)
            self._motion_lengths.append(curr_len)
            
            curr_weight = motion_weights[f]
            self._motion_weights.append(curr_weight)
            self._motion_files.append(curr_file)

        self._motion_lengths = torch.tensor(self._motion_lengths, device=self._device, dtype=torch.float32)

        self._motion_weights = torch.tensor(self._motion_weights, dtype=torch.float32, device=self._device)
        self._motion_weights /= self._motion_weights.sum()

        self._motion_fps = torch.tensor(self._motion_fps, device=self._device, dtype=torch.float32)
        self._motion_dt = torch.tensor(self._motion_dt, device=self._device, dtype=torch.float32)
        self._motion_num_frames = torch.tensor(self._motion_num_frames, device=self._device)


        num_motions = self.num_motions()
        total_len = self.get_total_length()

        print("Loaded {:d} motions with a total length of {:.3f}s.".format(num_motions, total_len))

        return

    def _fetch_motion_files(self, motion_file):
        ext = os.path.splitext(motion_file)[1]
        if (ext == ".yaml"):
            dir_name = os.path.dirname(motion_file)
            motion_files = []
            motion_weights = []

            with open(os.path.join(os.getcwd(), motion_file), 'r') as f:
                motion_config = yaml.load(f, Loader=yaml.SafeLoader)

            motion_list = motion_config['motions']
            for motion_entry in motion_list:
                curr_file = motion_entry['file']
                curr_weight = motion_entry['weight']
                assert(curr_weight >= 0)

                curr_file = os.path.join(dir_name, curr_file)
                motion_weights.append(curr_weight)
                motion_files.append(curr_file)
        else:
            motion_files = [motion_file]
            motion_weights = [1.0]

        return motion_files, motion_weights

    def _calc_frame_blend(self, time, len, num_frames, dt):

        phase = time / len
        phase = torch.clip(phase, 0.0, 1.0)

        frame_idx0 = (phase * (num_frames - 1)).long()
        frame_idx1 = torch.min(frame_idx0 + 1, num_frames - 1)
        blend = (time - frame_idx0 * dt) / dt

        return frame_idx0, frame_idx1, blend

    def _get_num_bodies(self):
        motion = self.get_motion(0)
        num_bodies = motion.num_joints
        return num_bodies

    def _compute_motion_dof_vels(self, motion):
        num_frames = motion.tensor.shape[0]
        dt = 1.0 / motion.fps
        dof_vels = []

        for f in range(num_frames - 1):
            local_rot0 = motion.local_rotation[f]
            local_rot1 = motion.local_rotation[f + 1]
            frame_dof_vel = self._local_rotation_to_dof_vel(local_rot0, local_rot1, dt)
            frame_dof_vel = frame_dof_vel
            dof_vels.append(frame_dof_vel)
        
        dof_vels.append(dof_vels[-1])
        dof_vels = torch.stack(dof_vels, dim=0)

        return dof_vels
    
    def _local_rotation_to_dof(self, local_rot):
        body_ids = self._dof_body_ids
        dof_offsets = self._dof_offsets

        n = local_rot.shape[0]
        dof_pos = torch.zeros((n, self._num_dof), dtype=torch.float, device=self._device)

        for j in range(len(body_ids)):
            body_id = body_ids[j]
            joint_offset = dof_offsets[j]
            joint_size = dof_offsets[j + 1] - joint_offset

            if (joint_size == 3):
                joint_q = local_rot[:, body_id]
                joint_exp_map = torch_utils.quat_to_exp_map(joint_q)
                dof_pos[:, joint_offset:(joint_offset + joint_size)] = joint_exp_map
            elif (joint_size == 1):
                joint_q = local_rot[:, body_id]
                joint_theta, joint_axis = torch_utils.quat_to_angle_axis(joint_q)
                joint_theta = joint_theta * joint_axis[..., 1] # assume joint is always along y axis

                joint_theta = normalize_angle(joint_theta)
                dof_pos[:, joint_offset] = joint_theta

            else:
                print("Unsupported joint type")
                assert(False)

        return dof_pos

    def _local_rotation_to_dof_vel(self, local_rot0, local_rot1, dt):
        body_ids = self._dof_body_ids
        dof_offsets = self._dof_offsets

        dof_vel = torch.zeros([self._num_dof], device=self._device)

        diff_quat_data = quat_mul_norm(quat_inverse(local_rot0), local_rot1)
        diff_angle, diff_axis = quat_angle_axis(diff_quat_data)
        local_vel = diff_axis * diff_angle.unsqueeze(-1) / dt
        local_vel = local_vel

        for j in range(len(body_ids)):
            body_id = body_ids[j]
            joint_offset = dof_offsets[j]
            joint_size = dof_offsets[j + 1] - joint_offset

            if (joint_size == 3):
                joint_vel = local_vel[body_id]
                dof_vel[joint_offset:(joint_offset + joint_size)] = joint_vel

            elif (joint_size == 1):
                assert(joint_size == 1)
                joint_vel = local_vel[body_id]
                dof_vel[joint_offset] = joint_vel[1] # assume joint is always along y axis

            else:
                print("Unsupported joint type")
                assert(False)

        return dof_vel

================================================
FILE: timechamber/utils/reformat.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from omegaconf import DictConfig, OmegaConf
from typing import Dict

def omegaconf_to_dict(d: DictConfig)->Dict:
    """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation."""
    ret = {}
    for k, v in d.items():
        if isinstance(v, DictConfig):
            ret[k] = omegaconf_to_dict(v)
        else:
            ret[k] = v
    return ret

def print_dict(val, nesting: int = -4, start: bool = True):
    """Outputs a nested dictionory."""
    if type(val) == dict:
        if not start:
            print('')
        nesting += 4
        for k in val:
            print(nesting * ' ', end='')
            print(k, end=': ')
            print_dict(val[k], nesting, start=False)
    else:
        print(val)

# EOF


================================================
FILE: timechamber/utils/rlgames_utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from rl_games.common import env_configurations, vecenv
from rl_games.common.algo_observer import AlgoObserver
from rl_games.algos_torch import torch_ext
from timechamber.utils.utils import set_seed
import torch
import numpy as np
from typing import Callable
from isaacgym import gymapi
from isaacgym import gymutil
from omegaconf import DictConfig
from timechamber.tasks import isaacgym_task_map
from timechamber.utils.vec_task_wrappers import VecTaskPythonWrapper
from timechamber.utils.config import parse_sim_params

SIM_TIMESTEP = 1.0 / 60.0

def get_rlgames_env_creator(
        # used to create the vec task
        seed: int,
        cfg: DictConfig,
        task_config: dict,
        task_name: str,
        sim_device: str,
        rl_device: str,
        graphics_device_id: int,
        headless: bool,
        device_type: str = "cuda",
        # Used to handle multi-gpu case
        multi_gpu: bool = False,
        post_create_hook: Callable = None,
        virtual_screen_capture: bool = False,
        force_render: bool = False,
):
    """Parses the configuration parameters for the environment task and creates a VecTask

    Args:
        task_config: environment configuration.
        task_name: Name of the task, used to evaluate based on the imported name (eg 'Trifinger')
        sim_device: The type of env device, eg 'cuda:0'
        rl_device: Device that RL will be done on, eg 'cuda:0'
        graphics_device_id: Graphics device ID.
        headless: Whether to run in headless mode.
        multi_gpu: Whether to use multi gpu
        post_create_hook: Hooks to be called after environment creation.
            [Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs]
        virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. 
        force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True)
    Returns:
        A VecTaskPython object.
    """
    def create_rlgpu_env():
        """
        Creates the task from configurations and wraps it using RL-games wrappers if required.
        """

        # create native task and pass custom config
        if task_name == "MA_Humanoid_Strike":
            sim_params = parse_sim_params(cfg, task_config)
            if cfg.physics_engine == "physx":
                physics_engine = gymapi.SIM_PHYSX
            elif cfg.physics_engine == "flex":
                physics_engine = gymapi.SIM_FLEX
            task = isaacgym_task_map[task_name](
                cfg=task_config,
                sim_params=sim_params,
                physics_engine=physics_engine,
                device_type=device_type,
                device_id=graphics_device_id,
                headless=headless
            )
            env = VecTaskPythonWrapper(task, rl_device,
                                       task_config.get("clip_observations", np.inf),
                                       task_config.get("clip_actions", 1.0),
                                       AMP=True)
        else:
            task = isaacgym_task_map[task_name](
                cfg=task_config,
                rl_device=rl_device,
                sim_device=sim_device,
                graphics_device_id=graphics_device_id,
                headless=headless,
                virtual_screen_capture=virtual_screen_capture,
                force_render=force_render,
            )
            env = VecTaskPythonWrapper(task, rl_device, task_config.get("clip_observations", np.inf), task_config.get("clip_actions", 1.0))
        
        if post_create_hook is not None:
            post_create_hook()

        return env
    return create_rlgpu_env


class RLGPUAlgoObserver(AlgoObserver):
    """Allows us to log stats from the env along with the algorithm running stats. """

    def __init__(self):
        pass

    def after_init(self, algo):
        self.algo = algo
        self.mean_scores = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device)
        self.ep_infos = []
        self.direct_info = {}
        self.writer = self.algo.writer

    def process_infos(self, infos, done_indices):
        assert isinstance(infos, dict), "RLGPUAlgoObserver expects dict info"
        if isinstance(infos, dict):
            if 'episode' in infos:
                self.ep_infos.append(infos['episode'])

            if len(infos) > 0 and isinstance(infos, dict):  # allow direct logging from env
                self.direct_info = {}
                for k, v in infos.items():
                    # only log scalars
                    if isinstance(v, float) or isinstance(v, int) or (isinstance(v, torch.Tensor) and len(v.shape) == 0):
                        self.direct_info[k] = v

    def after_clear_stats(self):
        self.mean_scores.clear()

    def after_print_stats(self, frame, epoch_num, total_time):
        if self.ep_infos:
            for key in self.ep_infos[0]:
                    infotensor = torch.tensor([], device=self.algo.device)
                    for ep_info in self.ep_infos:
                        # handle scalar and zero dimensional tensor infos
                        if not isinstance(ep_info[key], torch.Tensor):
                            ep_info[key] = torch.Tensor([ep_info[key]])
                        if len(ep_info[key].shape) == 0:
                            ep_info[key] = ep_info[key].unsqueeze(0)
                        infotensor = torch.cat((infotensor, ep_info[key].to(self.algo.device)))
                    value = torch.mean(infotensor)
                    self.writer.add_scalar('Episode/' + key, value, epoch_num)
            self.ep_infos.clear()
        
        for k, v in self.direct_info.items():
            self.writer.add_scalar(f'{k}/frame', v, frame)
            self.writer.add_scalar(f'{k}/iter', v, epoch_num)
            self.writer.add_scalar(f'{k}/time', v, total_time)

        if self.mean_scores.current_size > 0:
            mean_scores = self.mean_scores.get_mean()
            self.writer.add_scalar('scores/mean', mean_scores, frame)
            self.writer.add_scalar('scores/iter', mean_scores, epoch_num)
            self.writer.add_scalar('scores/time', mean_scores, total_time)


class RLGPUEnv(vecenv.IVecEnv):
    def __init__(self, config_name, num_actors, **kwargs):
        self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs)
        self.use_global_obs = (self.env.num_states > 0)

        self.full_state = {}
        self.full_state["obs"] = self.reset()
        if self.use_global_obs:
            self.full_state["states"] = self.env.get_state()
        return

    def step(self, action):
        next_obs, reward, is_done, info = self.env.step(action)

        # todo: improve, return only dictinary
        self.full_state["obs"] = next_obs
        if self.use_global_obs:
            self.full_state["states"] = self.env.get_state()
        return self.full_state, reward, is_done, info

    def reset(self, env_ids=None):
        self.full_state["obs"] = self.env.reset(env_ids)
        if self.use_global_obs:
            self.full_state["states"] = self.env.get_state()
        return self.full_state

    def get_number_of_agents(self):
        return self.env.get_number_of_agents()

    def get_env_info(self):
        info = {}
        info['action_space'] = self.env.action_space
        info['observation_space'] = self.env.observation_space
        info['amp_observation_space'] = self.env.amp_observation_space

        if self.use_global_obs:
            info['state_space'] = self.env.state_space
            print(info['action_space'], info['observation_space'], info['state_space'])
        else:
            print(info['action_space'], info['observation_space'])

        return info


================================================
FILE: timechamber/utils/torch_jit_utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import numpy as np
from isaacgym.torch_utils import *


@torch.jit.script
def compute_heading_and_up(
    torso_rotation, inv_start_rot, to_target, vec0, vec1, up_idx
):
    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, int) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
    num_envs = torso_rotation.shape[0]
    target_dirs = normalize(to_target)

    torso_quat = quat_mul(torso_rotation, inv_start_rot)
    up_vec = get_basis_vector(torso_quat, vec1).view(num_envs, 3)
    heading_vec = get_basis_vector(torso_quat, vec0).view(num_envs, 3)
    up_proj = up_vec[:, up_idx]
    heading_proj = torch.bmm(heading_vec.view(
        num_envs, 1, 3), target_dirs.view(num_envs, 3, 1)).view(num_envs)

    return torso_quat, up_proj, heading_proj, up_vec, heading_vec


@torch.jit.script
def compute_rot(torso_quat, velocity, ang_velocity, targets, torso_positions):
    vel_loc = quat_rotate_inverse(torso_quat, velocity)
    angvel_loc = quat_rotate_inverse(torso_quat, ang_velocity)

    roll, pitch, yaw = get_euler_xyz(torso_quat)

    walk_target_angle = torch.atan2(targets[:, 2] - torso_positions[:, 2],
                                    targets[:, 0] - torso_positions[:, 0])
    angle_to_target = walk_target_angle - yaw

    return vel_loc, angvel_loc, roll, pitch, yaw, angle_to_target


@torch.jit.script
def quat_axis(q, axis=0):
    # type: (Tensor, int) -> Tensor
    basis_vec = torch.zeros(q.shape[0], 3, device=q.device)
    basis_vec[:, axis] = 1
    return quat_rotate(q, basis_vec)


"""
Normalization and Denormalization of Tensors
"""


@torch.jit.script
def scale_transform(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor:
    """
    Normalizes a given input tensor to a range of [-1, 1].

    @note It uses pytorch broadcasting functionality to deal with batched input.

    Args:
        x: Input tensor of shape (N, dims).
        lower: The minimum value of the tensor. Shape (dims,)
        upper: The maximum value of the tensor. Shape (dims,)

    Returns:
        Normalized transform of the tensor. Shape (N, dims)
    """
    # default value of center
    offset = (lower + upper) * 0.5
    # return normalized tensor
    return 2 * (x - offset) / (upper - lower)


@torch.jit.script
def unscale_transform(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor:
    """
    Denormalizes a given input tensor from range of [-1, 1] to (lower, upper).

    @note It uses pytorch broadcasting functionality to deal with batched input.

    Args:
        x: Input tensor of shape (N, dims).
        lower: The minimum value of the tensor. Shape (dims,)
        upper: The maximum value of the tensor. Shape (dims,)

    Returns:
        Denormalized transform of the tensor. Shape (N, dims)
    """
    # default value of center
    offset = (lower + upper) * 0.5
    # return normalized tensor
    return x * (upper - lower) * 0.5 + offset

@torch.jit.script
def saturate(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor:
    """
    Clamps a given input tensor to (lower, upper).

    @note It uses pytorch broadcasting functionality to deal with batched input.

    Args:
        x: Input tensor of shape (N, dims).
        lower: The minimum value of the tensor. Shape (dims,)
        upper: The maximum value of the tensor. Shape (dims,)

    Returns:
        Clamped transform of the tensor. Shape (N, dims)
    """
    return torch.max(torch.min(x, upper), lower)

"""
Rotation conversions
"""

@torch.jit.script
def quat_diff_rad(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    """
    Get the difference in radians between two quaternions.

    Args:
        a: first quaternion, shape (N, 4)
        b: second quaternion, shape (N, 4)
    Returns:
        Difference in radians, shape (N,)
    """
    b_conj = quat_conjugate(b)
    mul = quat_mul(a, b_conj)
    # 2 * torch.acos(torch.abs(mul[:, -1]))
    return 2.0 * torch.asin(
        torch.clamp(
            torch.norm(
                mul[:, 0:3],
                p=2, dim=-1), max=1.0)
    )


@torch.jit.script
def local_to_world_space(pos_offset_local: torch.Tensor, pose_global: torch.Tensor):
    """ Convert a point from the local frame to the global frame
    Args:
        pos_offset_local: Point in local frame. Shape: [N, 3]
        pose_global: The spatial pose of this point. Shape: [N, 7]
    Returns:
        Position in the global frame. Shape: [N, 3]
    """
    quat_pos_local = torch.cat(
        [pos_offset_local, torch.zeros(pos_offset_local.shape[0], 1, dtype=torch.float32, device=pos_offset_local.device)],
        dim=-1
    )
    quat_global = pose_global[:, 3:7]
    quat_global_conj = quat_conjugate(quat_global)
    pos_offset_global = quat_mul(quat_global, quat_mul(quat_pos_local, quat_global_conj))[:, 0:3]

    result_pos_gloal = pos_offset_global + pose_global[:, 0:3]

    return result_pos_gloal

# NB: do not make this function jit, since it is passed around as an argument.
def normalise_quat_in_pose(pose):
    """Takes a pose and normalises the quaternion portion of it.

    Args:
        pose: shape N, 7
    Returns:
        Pose with normalised quat. Shape N, 7
    """
    pos = pose[:, 0:3]
    quat = pose[:, 3:7]
    quat /= torch.norm(quat, dim=-1, p=2).reshape(-1, 1)
    return torch.cat([pos, quat], dim=-1)

@torch.jit.script
def my_quat_rotate(q, v):
    shape = q.shape
    q_w = q[:, -1]
    q_vec = q[:, :3]
    a = v * (2.0 * q_w ** 2 - 1.0).unsqueeze(-1)
    b = torch.cross(q_vec, v, dim=-1) * q_w.unsqueeze(-1) * 2.0
    c = q_vec * \
        torch.bmm(q_vec.view(shape[0], 1, 3), v.view(
            shape[0], 3, 1)).squeeze(-1) * 2.0
    return a + b + c

@torch.jit.script
def quat_to_angle_axis(q):
    # type: (Tensor) -> Tuple[Tensor, Tensor]
    # computes axis-angle representation from quaternion q
    # q must be normalized
    min_theta = 1e-5
    qx, qy, qz, qw = 0, 1, 2, 3

    sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw])
    angle = 2 * torch.acos(q[..., qw])
    angle = normalize_angle(angle)
    sin_theta_expand = sin_theta.unsqueeze(-1)
    axis = q[..., qx:qw] / sin_theta_expand

    mask = sin_theta > min_theta
    default_axis = torch.zeros_like(axis)
    default_axis[..., -1] = 1

    angle = torch.where(mask, angle, torch.zeros_like(angle))
    mask_expand = mask.unsqueeze(-1)
    axis = torch.where(mask_expand, axis, default_axis)
    return angle, axis

@torch.jit.script
def angle_axis_to_exp_map(angle, axis):
    # type: (Tensor, Tensor) -> Tensor
    # compute exponential map from axis-angle
    angle_expand = angle.unsqueeze(-1)
    exp_map = angle_expand * axis
    return exp_map

@torch.jit.script
def quat_to_exp_map(q):
    # type: (Tensor) -> Tensor
    # compute exponential map from quaternion
    # q must be normalized
    angle, axis = quat_to_angle_axis(q)
    exp_map = angle_axis_to_exp_map(angle, axis)
    return exp_map

@torch.jit.script
def quat_to_tan_norm(q):
    # type: (Tensor) -> Tensor
    # represents a rotation using the tangent and normal vectors
    ref_tan = torch.zeros_like(q[..., 0:3])
    ref_tan[..., 0] = 1
    tan = my_quat_rotate(q, ref_tan)
    
    ref_norm = torch.zeros_like(q[..., 0:3])
    ref_norm[..., -1] = 1
    norm = my_quat_rotate(q, ref_norm)
    
    norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1)
    return norm_tan

@torch.jit.script
def euler_xyz_to_exp_map(roll, pitch, yaw):
    # type: (Tensor, Tensor, Tensor) -> Tensor
    q = quat_from_euler_xyz(roll, pitch, yaw)
    exp_map = quat_to_exp_map(q)
    return exp_map

@torch.jit.script
def exp_map_to_angle_axis(exp_map):
    min_theta = 1e-5

    angle = torch.norm(exp_map, dim=-1)
    angle_exp = torch.unsqueeze(angle, dim=-1)
    axis = exp_map / angle_exp
    angle = normalize_angle(angle)

    default_axis = torch.zeros_like(exp_map)
    default_axis[..., -1] = 1

    mask = angle > min_theta
    angle = torch.where(mask, angle, torch.zeros_like(angle))
    mask_expand = mask.unsqueeze(-1)
    axis = torch.where(mask_expand, axis, default_axis)

    return angle, axis

@torch.jit.script
def exp_map_to_quat(exp_map):
    angle, axis = exp_map_to_angle_axis(exp_map)
    q = quat_from_angle_axis(angle, axis)
    return q

@torch.jit.script
def slerp(q0, q1, t):
    # type: (Tensor, Tensor, Tensor) -> Tensor
    qx, qy, qz, qw = 0, 1, 2, 3

    cos_half_theta = q0[..., qw] * q1[..., qw] \
                   + q0[..., qx] * q1[..., qx] \
                   + q0[..., qy] * q1[..., qy] \
                   + q0[..., qz] * q1[..., qz]
    
    neg_mask = cos_half_theta < 0
    q1 = q1.clone()
    q1[neg_mask] = -q1[neg_mask]
    cos_half_theta = torch.abs(cos_half_theta)
    cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1)

    half_theta = torch.acos(cos_half_theta);
    sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta);

    ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta;
    ratioB = torch.sin(t * half_theta) / sin_half_theta; 
    
    new_q_x = ratioA * q0[..., qx:qx+1] + ratioB * q1[..., qx:qx+1]
    new_q_y = ratioA * q0[..., qy:qy+1] + ratioB * q1[..., qy:qy+1]
    new_q_z = ratioA * q0[..., qz:qz+1] + ratioB * q1[..., qz:qz+1]
    new_q_w = ratioA * q0[..., qw:qw+1] + ratioB * q1[..., qw:qw+1]

    cat_dim = len(new_q_w.shape) - 1
    new_q = torch.cat([new_q_x, new_q_y, new_q_z, new_q_w], dim=cat_dim)

    new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q)
    new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q)

    return new_q

@torch.jit.script
def calc_heading(q):
    # type: (Tensor) -> Tensor
    # calculate heading direction from quaternion
    # the heading is the direction on the xy plane
    # q must be normalized
    ref_dir = torch.zeros_like(q[..., 0:3])
    ref_dir[..., 0] = 1
    rot_dir = my_quat_rotate(q, ref_dir)

    heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0])
    return heading

@torch.jit.script
def calc_heading_quat(q):
    # type: (Tensor) -> Tensor
    # calculate heading rotation from quaternion
    # the heading is the direction on the xy plane
    # q must be normalized
    heading = calc_heading(q)
    axis = torch.zeros_like(q[..., 0:3])
    axis[..., 2] = 1

    heading_q = quat_from_angle_axis(heading, axis)
    return heading_q

@torch.jit.script
def calc_heading_quat_inv(q):
    # type: (Tensor) -> Tensor
    # calculate heading rotation from quaternion
    # the heading is the direction on the xy plane
    # q must be normalized
    heading = calc_heading(q)
    axis = torch.zeros_like(q[..., 0:3])
    axis[..., 2] = 1

    heading_q = quat_from_angle_axis(-heading, axis)
    return heading_q


# EOF


================================================
FILE: timechamber/utils/torch_utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch
import numpy as np

from isaacgym.torch_utils import *

@torch.jit.script
def quat_to_angle_axis(q):
    # type: (Tensor) -> Tuple[Tensor, Tensor]
    # computes axis-angle representation from quaternion q
    # q must be normalized
    min_theta = 1e-5
    qx, qy, qz, qw = 0, 1, 2, 3

    sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw])
    angle = 2 * torch.acos(q[..., qw])
    angle = normalize_angle(angle)
    sin_theta_expand = sin_theta.unsqueeze(-1)
    axis = q[..., qx:qw] / sin_theta_expand

    mask = torch.abs(sin_theta) > min_theta
    default_axis = torch.zeros_like(axis)
    default_axis[..., -1] = 1

    angle = torch.where(mask, angle, torch.zeros_like(angle))
    mask_expand = mask.unsqueeze(-1)
    axis = torch.where(mask_expand, axis, default_axis)
    return angle, axis

@torch.jit.script
def angle_axis_to_exp_map(angle, axis):
    # type: (Tensor, Tensor) -> Tensor
    # compute exponential map from axis-angle
    angle_expand = angle.unsqueeze(-1)
    exp_map = angle_expand * axis
    return exp_map

@torch.jit.script
def quat_to_exp_map(q):
    # type: (Tensor) -> Tensor
    # compute exponential map from quaternion
    # q must be normalized
    angle, axis = quat_to_angle_axis(q)
    exp_map = angle_axis_to_exp_map(angle, axis)
    return exp_map

@torch.jit.script
def quat_to_tan_norm(q):
    # type: (Tensor) -> Tensor
    # represents a rotation using the tangent and normal vectors
    ref_tan = torch.zeros_like(q[..., 0:3])
    ref_tan[..., 0] = 1
    tan = quat_rotate(q, ref_tan)
    
    ref_norm = torch.zeros_like(q[..., 0:3])
    ref_norm[..., -1] = 1
    norm = quat_rotate(q, ref_norm)
    
    norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1)
    return norm_tan

@torch.jit.script
def euler_xyz_to_exp_map(roll, pitch, yaw):
    # type: (Tensor, Tensor, Tensor) -> Tensor
    q = quat_from_euler_xyz(roll, pitch, yaw)
    exp_map = quat_to_exp_map(q)
    return exp_map

@torch.jit.script
def exp_map_to_angle_axis(exp_map):
    min_theta = 1e-5

    angle = torch.norm(exp_map, dim=-1)
    angle_exp = torch.unsqueeze(angle, dim=-1)
    axis = exp_map / angle_exp
    angle = normalize_angle(angle)

    default_axis = torch.zeros_like(exp_map)
    default_axis[..., -1] = 1

    mask = torch.abs(angle) > min_theta
    angle = torch.where(mask, angle, torch.zeros_like(angle))
    mask_expand = mask.unsqueeze(-1)
    axis = torch.where(mask_expand, axis, default_axis)

    return angle, axis

@torch.jit.script
def exp_map_to_quat(exp_map):
    angle, axis = exp_map_to_angle_axis(exp_map)
    q = quat_from_angle_axis(angle, axis)
    return q

@torch.jit.script
def slerp(q0, q1, t):
    # type: (Tensor, Tensor, Tensor) -> Tensor
    cos_half_theta = torch.sum(q0 * q1, dim=-1)

    neg_mask = cos_half_theta < 0
    q1 = q1.clone()
    q1[neg_mask] = -q1[neg_mask]
    cos_half_theta = torch.abs(cos_half_theta)
    cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1)

    half_theta = torch.acos(cos_half_theta);
    sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta);

    ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta;
    ratioB = torch.sin(t * half_theta) / sin_half_theta; 
    
    new_q = ratioA * q0 + ratioB * q1

    new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q)
    new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q)

    return new_q

@torch.jit.script
def calc_heading(q):
    # type: (Tensor) -> Tensor
    # calculate heading direction from quaternion
    # the heading is the direction on the xy plane
    # q must be normalized
    ref_dir = torch.zeros_like(q[..., 0:3])
    ref_dir[..., 0] = 1
    rot_dir = quat_rotate(q, ref_dir)

    heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0])
    return heading

@torch.jit.script
def calc_heading_quat(q):
    # type: (Tensor) -> Tensor
    # calculate heading rotation from quaternion
    # the heading is the direction on the xy plane
    # q must be normalized
    heading = calc_heading(q)
    axis = torch.zeros_like(q[..., 0:3])
    axis[..., 2] = 1

    heading_q = quat_from_angle_axis(heading, axis)
    return heading_q

@torch.jit.script
def calc_heading_quat_inv(q):
    # type: (Tensor) -> Tensor
    # calculate heading rotation from quaternion
    # the heading is the direction on the xy plane
    # q must be normalized
    heading = calc_heading(q)
    axis = torch.zeros_like(q[..., 0:3])
    axis[..., 2] = 1

    heading_q = quat_from_angle_axis(-heading, axis)
    return heading_q

================================================
FILE: timechamber/utils/utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# python

import numpy as np
import torch
import random
import os
from collections import OrderedDict
import time
from isaacgym import gymapi
from isaacgym import gymutil

def set_np_formatting():
    """ formats numpy print """
    np.set_printoptions(edgeitems=30, infstr='inf',
                        linewidth=4000, nanstr='nan', precision=2,
                        suppress=False, threshold=10000, formatter=None)


def set_seed(seed, torch_deterministic=False, rank=0):
    """ set seed across modules """
    if seed == -1 and torch_deterministic:
        seed = 42 + rank
    elif seed == -1:
        seed = np.random.randint(0, 10000)
    else:
        seed = seed + rank

    print("Setting seed: {}".format(seed))

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if torch_deterministic:
        # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
        os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        torch.use_deterministic_algorithms(True)
    else:
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False

    return seed

def load_check(checkpoint, normalize_input: bool, normalize_value: bool):
    extras = OrderedDict()
    if normalize_value and 'value_mean_std.running_mean' not in checkpoint['model'].keys():
        extras['value_mean_std.running_mean'] = checkpoint['reward_mean_std']['running_mean']
        extras['value_mean_std.running_var'] = checkpoint['reward_mean_std']['running_var']
        extras['value_mean_std.count'] = checkpoint['reward_mean_std']['count']

    if normalize_input and 'running_mean_std.running_mean' not in checkpoint['model'].keys():
        extras['running_mean_std.running_mean'] = checkpoint['running_mean_std']['running_mean']
        extras['running_mean_std.running_var'] = checkpoint['running_mean_std']['running_var']
        extras['running_mean_std.count'] = checkpoint['running_mean_std']['count']
    
    extras.update(checkpoint['model'])
    checkpoint['model'] = extras
    return checkpoint

def safe_filesystem_op(func, *args, **kwargs):
    """
    This is to prevent spurious crashes related to saving checkpoints or restoring from checkpoints in a Network
    Filesystem environment (i.e. NGC cloud or SLURM)
    """
    num_attempts = 5
    for attempt in range(num_attempts):
        try:
            return func(*args, **kwargs)
        except Exception as exc:
            print(f'Exception {exc} when trying to execute {func} with args:{args} and kwargs:{kwargs}...')
            wait_sec = 2 ** attempt
            print(f'Waiting {wait_sec} before trying again...')
            time.sleep(wait_sec)

    raise RuntimeError(f'Could not execute {func}, give up after {num_attempts} attempts...')

def safe_load(filename, device=None):
    if device is not None:
        return safe_filesystem_op(torch.load, filename, map_location=device)
    else:
        return safe_filesystem_op(torch.load, filename)

def load_checkpoint(filename, device=None):
    print("=> loading checkpoint '{}'".format(filename))
    state = safe_load(filename, device=device)
    return state

def print_actor_info(gym, env, actor_handle):

    name = gym.get_actor_name(env, actor_handle)

    body_names = gym.get_actor_rigid_body_names(env, actor_handle)
    body_dict = gym.get_actor_rigid_body_dict(env, actor_handle)

    joint_names = gym.get_actor_joint_names(env, actor_handle)
    joint_dict = gym.get_actor_joint_dict(env, actor_handle)

    dof_names = gym.get_actor_dof_names(env, actor_handle)
    dof_dict = gym.get_actor_dof_dict(env, actor_handle)

    print()
    print("===== Actor: %s =======================================" % name)

    print("\nBodies")
    print(body_names)
    print(body_dict)

    print("\nJoints")
    print(joint_names)
    print(joint_dict)

    print("\n Degrees Of Freedom (DOFs)")
    print(dof_names)
    print(dof_dict)
    print()

    # Get body state information
    body_states = gym.get_actor_rigid_body_states(
        env, actor_handle, gymapi.STATE_ALL)

    # Print some state slices
    print("Poses from Body State:")
    print(body_states['pose'])          # print just the poses

    print("\nVelocities from Body State:")
    print(body_states['vel'])          # print just the velocities
    print()

    # iterate through bodies and print name and position
    body_positions = body_states['pose']['p']
    for i in range(len(body_names)):
        print("Body '%s' has position" % body_names[i], body_positions[i])

    print("\nDOF states:")

    # get DOF states
    dof_states = gym.get_actor_dof_states(env, actor_handle, gymapi.STATE_ALL)

    # print some state slices
    # Print all states for each degree of freedom
    print(dof_states)
    print()

    # iterate through DOFs and print name and position
    dof_positions = dof_states['pos']
    for i in range(len(dof_names)):
        print("DOF '%s' has position" % dof_names[i], dof_positions[i])

def print_asset_info(asset, name, gym):
    print("======== Asset info %s: ========" % (name))
    num_bodies = gym.get_asset_rigid_body_count(asset)
    num_joints = gym.get_asset_joint_count(asset)
    num_dofs = gym.get_asset_dof_count(asset)
    print("Got %d bodies, %d joints, and %d DOFs" %
          (num_bodies, num_joints, num_dofs))

    # Iterate through bodies
    print("Bodies:")
    for i in range(num_bodies):
        name = gym.get_asset_rigid_body_name(asset, i)
        print(" %2d: '%s'" % (i, name))

    # Iterate through joints
    print("Joints:")
    for i in range(num_joints):
        name = gym.get_asset_joint_name(asset, i)
        type = gym.get_asset_joint_type(asset, i)
        type_name = gym.get_joint_type_string(type)
        print(" %2d: '%s' (%s)" % (i, name, type_name))

    # iterate through degrees of freedom (DOFs)
    print("DOFs:")
    for i in range(num_dofs):
        name = gym.get_asset_dof_name(asset, i)
        type = gym.get_asset_dof_type(asset, i)
        type_name = gym.get_dof_type_string(type)
        print(" %2d: '%s' (%s)" % (i, name, type_name))

# EOF


================================================
FILE: timechamber/utils/vec_task.py
================================================
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

from gym import spaces

from isaacgym import gymtorch
from isaacgym.torch_utils import to_torch
import torch
import numpy as np


# VecEnv Wrapper for RL training
class VecTask():
    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
        self.task = task

        self.num_environments = task.num_envs
        self.num_agents = 1  # used for multi-agent environments
        self.num_observations = task.num_obs
        self.num_states = task.num_states
        self.num_actions = task.num_actions

        self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf)
        self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf)
        self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.)

        self.clip_obs = clip_observations
        self.clip_actions = clip_actions
        self.rl_device = rl_device

        print("RL device: ", rl_device)

    def step(self, actions):
        raise NotImplementedError

    def reset(self):
        raise NotImplementedError

    def get_number_of_agents(self):
        return self.num_agents

    @property
    def observation_space(self):
        return self.obs_space

    @property
    def action_space(self):
        return self.act_space

    @property
    def num_envs(self):
        return self.num_environments

    @property
    def num_acts(self):
        return self.num_actions

    @property
    def num_obs(self):
        return self.num_observations


# C++ CPU Class
class VecTaskCPU(VecTask):
    def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
        super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
        self.sync_frame_time = sync_frame_time

    def step(self, actions):
        actions = actions.cpu().numpy()
        self.task.render(self.sync_frame_time)

        obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions))

        return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device),
                to_torch(rewards, dtype=torch.float, device=self.rl_device),
                to_torch(resets, dtype=torch.uint8, device=self.rl_device), [])

    def reset(self):
        actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f')

        # step the simulator
        obs, rewards, resets, extras = self.task.step(actions)

        return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device)


# C++ GPU Class
class VecTaskGPU(VecTask):
    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
        super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)

        self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs))
        self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,))
        self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,))

    def step(self, actions):
        self.task.render(False)
        actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions)
        actions_tensor = gymtorch.unwrap_tensor(actions_clipped)

        self.task.step(actions_tensor)

        return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, []

    def reset(self):
        actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
        actions_tensor = gymtorch.unwrap_tensor(actions)

        # step the simulator
        self.task.step(actions_tensor)

        return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs)


# Python CPU/GPU Class
class VecTaskPython(VecTask):

    def get_state(self):
        return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

    def step(self, actions):
        actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions)

        self.task.step(actions_tensor)

        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras

    def reset(self):
        actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))

        # step the simulator
        self.task.step(actions)

        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)


================================================
FILE: timechamber/utils/vec_task_wrappers.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from gym import spaces
import numpy as np
import torch
from timechamber.utils.vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython

class VecTaskCPUWrapper(VecTaskCPU):
    def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
        super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions)
        return

class VecTaskGPUWrapper(VecTaskGPU):
    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
        super().__init__(task, rl_device, clip_observations, clip_actions)
        return


class VecTaskPythonWrapper(VecTaskPython):
    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0, AMP=False):
        super().__init__(task, rl_device, clip_observations, clip_actions)
        if AMP:
            self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf)
        else:
            self._amp_obs_space = None
        return

    def reset(self, env_ids=None):
        self.task.reset(env_ids)
        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)

    @property
    def amp_observation_space(self):
        return self._amp_obs_space

    def fetch_amp_obs_demo(self, num_samples):
        return self.task.fetch_amp_obs_demo(num_samples)