Repository: inspirai/TimeChamber
Branch: main
Commit: af3f3571c99a
Files: 201
Total size: 120.5 MB
Directory structure:
gitextract_rvpupy7y/
├── .gitattributes
├── .gitignore
├── LICENSE
├── LISENCE/
│ └── isaacgymenvs/
│ └── LICENSE
├── README.md
├── assets/
│ └── mjcf/
│ └── nv_ant.xml
├── docs/
│ └── environments.md
├── setup.py
└── timechamber/
├── __init__.py
├── ase/
│ ├── ase_agent.py
│ ├── ase_models.py
│ ├── ase_network_builder.py
│ ├── ase_players.py
│ ├── hrl_agent.py
│ ├── hrl_models.py
│ ├── hrl_network_builder.py
│ ├── hrl_players.py
│ └── utils/
│ ├── amp_agent.py
│ ├── amp_datasets.py
│ ├── amp_models.py
│ ├── amp_network_builder.py
│ ├── amp_players.py
│ ├── common_agent.py
│ ├── common_player.py
│ └── replay_buffer.py
├── cfg/
│ ├── config.yaml
│ ├── task/
│ │ ├── MA_Ant_Battle.yaml
│ │ ├── MA_Ant_Sumo.yaml
│ │ └── MA_Humanoid_Strike.yaml
│ └── train/
│ ├── MA_Ant_BattlePPO.yaml
│ ├── MA_Ant_SumoPPO.yaml
│ ├── MA_Humanoid_StrikeHRL.yaml
│ └── base/
│ └── ase_humanoid_hrl.yaml
├── learning/
│ ├── common_agent.py
│ ├── common_player.py
│ ├── hrl_sp_agent.py
│ ├── hrl_sp_player.py
│ ├── pfsp_player_pool.py
│ ├── ppo_sp_agent.py
│ ├── ppo_sp_player.py
│ ├── replay_buffer.py
│ ├── vectorized_models.py
│ └── vectorized_network_builder.py
├── models/
│ ├── Humanoid_Strike/
│ │ ├── policy.pth
│ │ └── policy_op.pth
│ ├── ant_battle_2agents/
│ │ └── policy.pth
│ ├── ant_battle_3agents/
│ │ └── policy.pth
│ └── ant_sumo/
│ └── policy.pth
├── tasks/
│ ├── __init__.py
│ ├── ase_humanoid_base/
│ │ ├── base_task.py
│ │ ├── humanoid.py
│ │ ├── humanoid_amp.py
│ │ ├── humanoid_amp_task.py
│ │ └── poselib/
│ │ ├── README.md
│ │ ├── data/
│ │ │ ├── 01_01_cmu.fbx
│ │ │ ├── 07_01_cmu.fbx
│ │ │ ├── 08_02_cmu.fbx
│ │ │ ├── 09_11_cmu.fbx
│ │ │ ├── 49_08_cmu.fbx
│ │ │ ├── 55_01_cmu.fbx
│ │ │ ├── amp_humanoid_tpose.npy
│ │ │ ├── cmu_tpose.npy
│ │ │ ├── configs/
│ │ │ │ ├── retarget_cmu_to_amp.json
│ │ │ │ └── retarget_sfu_to_amp.json
│ │ │ └── sfu_tpose.npy
│ │ ├── fbx_importer.py
│ │ ├── generate_amp_humanoid_tpose.py
│ │ ├── mjcf_importer.py
│ │ ├── poselib/
│ │ │ ├── __init__.py
│ │ │ ├── core/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── backend/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── abstract.py
│ │ │ │ │ └── logger.py
│ │ │ │ ├── rotation3d.py
│ │ │ │ ├── tensor_utils.py
│ │ │ │ └── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_rotation.py
│ │ │ ├── skeleton/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── backend/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── fbx/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fbx_backend.py
│ │ │ │ │ └── fbx_read_wrapper.py
│ │ │ │ └── skeleton3d.py
│ │ │ └── visualization/
│ │ │ ├── __init__.py
│ │ │ ├── common.py
│ │ │ ├── core.py
│ │ │ ├── plt_plotter.py
│ │ │ ├── simple_plotter_tasks.py
│ │ │ ├── skeleton_plotter_tasks.py
│ │ │ └── tests/
│ │ │ ├── __init__.py
│ │ │ └── test_plotter.py
│ │ └── retarget_motion.py
│ ├── base/
│ │ ├── __init__.py
│ │ ├── ma_vec_task.py
│ │ └── vec_task.py
│ ├── data/
│ │ ├── assets/
│ │ │ └── mjcf/
│ │ │ └── amp_humanoid_sword_shield.xml
│ │ ├── models/
│ │ │ └── llc_reallusion_sword_shield.pth
│ │ └── motions/
│ │ └── reallusion_sword_shield/
│ │ ├── README.txt
│ │ ├── RL_Avatar_Atk_2xCombo01_Motion.npy
│ │ ├── RL_Avatar_Atk_2xCombo02_Motion.npy
│ │ ├── RL_Avatar_Atk_2xCombo03_Motion.npy
│ │ ├── RL_Avatar_Atk_2xCombo04_Motion.npy
│ │ ├── RL_Avatar_Atk_2xCombo05_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo01_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo02_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo03_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo04_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo05_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo06_Motion.npy
│ │ ├── RL_Avatar_Atk_3xCombo07_Motion.npy
│ │ ├── RL_Avatar_Atk_4xCombo01_Motion.npy
│ │ ├── RL_Avatar_Atk_4xCombo02_Motion.npy
│ │ ├── RL_Avatar_Atk_4xCombo03_Motion.npy
│ │ ├── RL_Avatar_Atk_Jump_Motion.npy
│ │ ├── RL_Avatar_Atk_Kick_Motion.npy
│ │ ├── RL_Avatar_Atk_ShieldCharge_Motion.npy
│ │ ├── RL_Avatar_Atk_ShieldSwipe01_Motion.npy
│ │ ├── RL_Avatar_Atk_ShieldSwipe02_Motion.npy
│ │ ├── RL_Avatar_Atk_SlashDown_Motion.npy
│ │ ├── RL_Avatar_Atk_SlashLeft_Motion.npy
│ │ ├── RL_Avatar_Atk_SlashRight_Motion.npy
│ │ ├── RL_Avatar_Atk_SlashUp_Motion.npy
│ │ ├── RL_Avatar_Atk_Spin_Motion.npy
│ │ ├── RL_Avatar_Atk_Stab_Motion.npy
│ │ ├── RL_Avatar_Counter_Atk01_Motion.npy
│ │ ├── RL_Avatar_Counter_Atk02_Motion.npy
│ │ ├── RL_Avatar_Counter_Atk03_Motion.npy
│ │ ├── RL_Avatar_Counter_Atk04_Motion.npy
│ │ ├── RL_Avatar_Counter_Atk05_Motion.npy
│ │ ├── RL_Avatar_Dodge_Backward_Motion.npy
│ │ ├── RL_Avatar_Dodgle_Left_Motion.npy
│ │ ├── RL_Avatar_Dodgle_Right_Motion.npy
│ │ ├── RL_Avatar_Fall_Backward_Motion.npy
│ │ ├── RL_Avatar_Fall_Left_Motion.npy
│ │ ├── RL_Avatar_Fall_Right_Motion.npy
│ │ ├── RL_Avatar_Fall_SpinLeft_Motion.npy
│ │ ├── RL_Avatar_Fall_SpinRight_Motion.npy
│ │ ├── RL_Avatar_Idle_Alert(0)_Motion.npy
│ │ ├── RL_Avatar_Idle_Alert_Motion.npy
│ │ ├── RL_Avatar_Idle_Battle(0)_Motion.npy
│ │ ├── RL_Avatar_Idle_Battle_Motion.npy
│ │ ├── RL_Avatar_Idle_Ready(0)_Motion.npy
│ │ ├── RL_Avatar_Idle_Ready_Motion.npy
│ │ ├── RL_Avatar_Kill_2xCombo01_Motion.npy
│ │ ├── RL_Avatar_Kill_2xCombo02_Motion.npy
│ │ ├── RL_Avatar_Kill_3xCombo01_Motion.npy
│ │ ├── RL_Avatar_Kill_3xCombo02_Motion.npy
│ │ ├── RL_Avatar_Kill_4xCombo01_Motion.npy
│ │ ├── RL_Avatar_RunBackward_Motion.npy
│ │ ├── RL_Avatar_RunForward_Motion.npy
│ │ ├── RL_Avatar_RunLeft_Motion.npy
│ │ ├── RL_Avatar_RunRight_Motion.npy
│ │ ├── RL_Avatar_Shield_BlockBackward_Motion.npy
│ │ ├── RL_Avatar_Shield_BlockCrouch_Motion.npy
│ │ ├── RL_Avatar_Shield_BlockDown_Motion.npy
│ │ ├── RL_Avatar_Shield_BlockLeft_Motion.npy
│ │ ├── RL_Avatar_Shield_BlockRight_Motion.npy
│ │ ├── RL_Avatar_Shield_BlockUp_Motion.npy
│ │ ├── RL_Avatar_Standoff_Circle_Motion.npy
│ │ ├── RL_Avatar_Standoff_Feint_Motion.npy
│ │ ├── RL_Avatar_Standoff_Swing_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryBackward01_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryBackward02_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryBackward03_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryBackward04_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryCrouch_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryDown_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryLeft_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryRight_Motion.npy
│ │ ├── RL_Avatar_Sword_ParryUp_Motion.npy
│ │ ├── RL_Avatar_Taunt_PoundChest_Motion.npy
│ │ ├── RL_Avatar_Taunt_Roar_Motion.npy
│ │ ├── RL_Avatar_Taunt_ShieldKnock_Motion.npy
│ │ ├── RL_Avatar_TurnLeft180_Motion.npy
│ │ ├── RL_Avatar_TurnLeft90_Motion.npy
│ │ ├── RL_Avatar_TurnRight180_Motion.npy
│ │ ├── RL_Avatar_TurnRight90_Motion.npy
│ │ ├── RL_Avatar_WalkBackward01_Motion.npy
│ │ ├── RL_Avatar_WalkBackward02_Motion.npy
│ │ ├── RL_Avatar_WalkForward01_Motion.npy
│ │ ├── RL_Avatar_WalkForward02_Motion.npy
│ │ ├── RL_Avatar_WalkLeft01_Motion.npy
│ │ ├── RL_Avatar_WalkLeft02_Motion.npy
│ │ ├── RL_Avatar_WalkRight01_Motion.npy
│ │ ├── RL_Avatar_WalkRight02_Motion.npy
│ │ └── dataset_reallusion_sword_shield.yaml
│ ├── ma_ant_battle.py
│ ├── ma_ant_sumo.py
│ └── ma_humanoid_strike.py
├── train.py
└── utils/
├── config.py
├── gym_util.py
├── logger.py
├── motion_lib.py
├── reformat.py
├── rlgames_utils.py
├── torch_jit_utils.py
├── torch_utils.py
├── utils.py
├── vec_task.py
└── vec_task_wrappers.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
================================================
FILE: .gitignore
================================================
videos
/timechamber/logs
*train_dir*
*ige_logs*
*.egg-info
/.vs
/.vscode
/_package
/shaders
._tmptext.txt
__pycache__/
/timechamber/tasks/__pycache__
/timechamber/utils/__pycache__
/timechamber/tasks/base/__pycache__
/tools/format/.lastrun
*.pyc
_doxygen
/rlisaacgymenvsgpu/logs
/timechamber/benchmarks/results
/timechamber/simpletests/results
*.pxd2
/tests/logs
/timechamber/balance_bot.xml
/timechamber/quadcopter.xml
/timechamber/ingenuity.xml
logs*
nn/
runs/
.idea
outputs/
*.hydra*
/timechamber/wandb
/test
.gitlab
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2022 MIT Inspir.ai
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: LISENCE/isaacgymenvs/LICENSE
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
# TimeChamber: A Massively Parallel Large Scale Self-Play Framework
****
**TimeChamber** is a large scale self-play framework running on parallel simulation.
Running self-play algorithms always need lots of hardware resources, especially on 3D physically simulated
environments.
We provide a self-play framework that can achieve fast training and evaluation with **ONLY ONE GPU**.
TimeChamber is developed with the following key features:
- **Parallel Simulation**: TimeChamber is built within [Isaac Gym](https://developer.nvidia.com/isaac-gym). Isaac Gym is
a fast GPU-based simulation platform. It supports running thousands of environments in parallel on a single GPU.For
example, on one NVIDIA Laptop RTX 3070Ti GPU, TimeChamber can reach **80,000+
mean FPS** by running 4,096 environments in parallel.
- **Parallel Evaluation**: TimeChamber can fast calculate dozens of policies' ELO
rating(represent their combat power). It also supports multi-player ELO calculations
by [multi-elo](https://github.com/djcunningham0/multielo). Inspired by Vectorization techniques
for [fast population-based training](https://github.com/instadeepai/fastpbrl), we leverage the
vectorized models to evaluate different policy in parallel.
- **Prioritized Fictitious Self-Play Benchmark**: We implement a classic PPO self-play algorithm on top
of [rl_games](https://github.com/Denys88/rl_games), with a prioritized player pool to avoid cycles and improve the
diversity of training policy.
- **Competitive Multi-Agent Tasks**: Inspired by [OpenAI RoboSumo](https://github.com/openai/robosumo) and [ASE](https://github.com/nv-tlabs/ASE), we introduce three
competitive multi-agent tasks(e.g.,Ant Sumo,Ant
Battle and Humanoid Strike) as examples.
The efficiency of our self-play framework has been tested on these tasks. After days of training,our agent can
discover some interesting
physical skills like pulling, jumping,etc. **Welcome to contribute your own environments!**
## Installation
****
Download and follow the installation instructions of Isaac Gym: https://developer.nvidia.com/isaac-gym
Ensure that Isaac Gym works on your system by running one of the examples from the `python/examples`
directory, like `joint_monkey.py`. If you have any trouble running the samples, please follow troubleshooting steps
described in the [Isaac Gym Preview Release 3/4 installation instructions](https://developer.nvidia.com/isaac-gym).
Then install this repo:
```bash
pip install -e .
```
## Quick Start
****
### Tasks
Source code for tasks can be found in `timechamber/tasks`,The detailed settings of state/action/reward are
in [here](./docs/environments.md).
More interesting tasks will come soon.
#### Humanoid Strike
Humanoid Strike is a 3D environment with two simulated humanoid physics characters. Each character is equipped with a sword and shield with 37 degrees-of-freedom.
The game will be restarted if one agent goes outside the arena. We measure how much the player damaged the opponent and how much the player was damaged by the opponent in the terminated step to determine the winner.
#### Ant Sumo
Ant Sumo is a 3D environment with simulated physics that allows pairs of ant agents to compete against each other.
To win, the agent has to push the opponent out of the ring. Every agent has 100 hp . Each step, If the agent's body
touches the ground, its hp will be reduced by 1.The agent whose hp becomes 0 will be eliminated.
#### Ant Battle
Ant Battle is an expanded environment of Ant Sumo. It supports more than two agents competing against with
each other. The battle ring radius will shrink, the agent going out of the ring will be eliminated.
### Self-Play Training
To train your policy for tasks, for example:
```bash
# run self-play training for Humanoid Strike task
python train.py task=MA_Humanoid_Strike headless=True
```
```bash
# run self-play training for Ant Sumo task
python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO headless=True
```
```bash
# run self-play training for Ant Battle task
python train.py task=MA_Ant_Battle train=MA_Ant_BattlePPO headless=True
```
Key arguments to the training script
follow [IsaacGymEnvs Configuration and command line arguments](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/README.md#configuration-and-command-line-arguments)
.
Other training arguments follow [rl_games config parameters](https://github.com/Denys88/rl_games#config-parameters),
you can change them in `timechamber/tasks/train/*.yaml`. There are some specific arguments for self-play training:
- `num_agents`: Set the number of agents for Ant Battle environment, it should be larger than 1.
- `op_checkpoint`: Set to path to the checkpoint to load initial opponent agent policy.
If it's empty, opponent agent will use random policy.
- `update_win_rate`: Win_rate threshold to add the current policy to opponent's player pool.
- `player_pool_length`: The max size of player pool, following FIFO rules.
- `games_to_check`: Warm up for training, the player pool won't be updated until the current policy plays such number of
games.
- `max_update_steps`: If current policy update iterations exceed that number, the current policy will be added to
opponent player_pool.
### Policies Evaluation
To evaluate your policies, for example:
```bash
# run testing for Ant Sumo policy
python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO test=True num_envs=4 minibatch_size=32 headless=False checkpoint='models/ant_sumo/policy.pth'
```
```bash
# run testing for Humanoid Strike policy
python train.py task=MA_Humanoid_Strike train=MA_Humanoid_StrikeHRL test=True num_envs=4 minibatch_size=32 headless=False checkpoint='models/Humanoid_Strike/policy.pth' op_checkpoint='models/Humanoid_Strike/policy_op.pth'
```
You can set the opponent agent policy using `op_checkpoint`. If it's empty, the opponent agent will use the same policy
as `checkpoint`.
We use vectorized models to accelerate the evaluation of policies. Put policies into checkpoint dir, let them compete
with each
other in parallel:
```bash
# run testing for Ant Sumo policy
python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO test=True headless=True checkpoint='models/ant_sumo' player_pool_type=vectorized
```
There are some specific arguments for self-play evaluation, you can change them in `timechamber/tasks/train/*.yaml`:
- `games_num`: Total episode number of evaluation.
- `record_elo`: Set `True` to record the ELO rating of your policies, after evaluation, you can check the `elo.jpg` in
your checkpoint dir.
- `init_elo`: Initial ELO rating of each policy.
### Building Your Own Task
You can build your own task
follow [IsaacGymEnvs](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/README.md#creating-an-environment)
, make sure the obs shape is correct and`info` contains `win`,`lose`and`draw`:
```python
import isaacgym
import timechamber
import torch
envs = timechamber.make(
seed=0,
task="MA_Ant_Sumo",
num_envs=2,
sim_device="cuda:0",
rl_device="cuda:0",
)
# the obs shape should be (num_agents*num_envs,num_obs).
# the obs of training agent is (:num_envs,num_obs)
print("Observation space is", envs.observation_space)
print("Action space is", envs.action_space)
obs = envs.reset()
for _ in range(20):
obs, reward, done, info = envs.step(
torch.rand((2 * 2,) + envs.action_space.shape, device="cuda:0")
)
# info:
# {'win': tensor([Bool, Bool])
# 'lose': tensor([Bool, Bool])
# 'draw': tensor([Bool, Bool])}
```
## Citing
If you use timechamber in your research please use the following citation:
````
@misc{InspirAI,
author = {Huang Ziming, Ziyi Liu, Wu Yutong, Flood Sung},
title = {TimeChamber: A Massively Parallel Large Scale Self-Play Framework},
year = {2022},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/inspirai/TimeChamber}},
}
================================================
FILE: assets/mjcf/nv_ant.xml
================================================
================================================
FILE: docs/environments.md
================================================
## Environments
We provide a detailed description of the environment here.
### Humanoid Strike
Humanoid Strike is a 3D environment with two simulated humanoid physics characters. Each character is equipped with a sword and shield with 37 degrees-of-freedom.
The game will be restarted if one agent goes outside the arena or the game reaches the maximum episode steps. We measure how much the player damaged the opponent and how much the player was damaged by the opponent in the terminated step to determine the winner.
#### Low-Level Observation Space
| Index | Description |
|:-------:|:------------------------------:|
| 0 | Height of the root from the ground. |
| 1 - 48 | Position of the body in the character’s local coordinate frame. |
| 49 - 150 | Rotation of the body in the character’s local coordinate frame. |
| 151 - 201 | Linear velocity of the root in the character’s local coordinate frame. |
| 202 - 252 | angular velocity of the root in the character’s local coordinate frame. |
#### High-Level Observation Space
| Index | Description |
|:-------:|:------------------------------:|
| 0 - 1 | relative distance from the borderline |
| 2 - 4 | relative distance from the opponent |
| 5 - 10 | Rotation of the opponent's root in the character’s local coordinate frame. |
| 11 - 13 | Linear velocity of the opponent'root in the character’s local coordinate frame. |
| 14 - 16 | angular velocity of the opponent'root in the character’s local coordinate frame. |
| 17 - 19 | relative distance between ego agent and opponent's sword |
| 20 - 22 | Linear velocity of the opponent' sword in the character’s local coordinate frame. |
| 23 - 25 | relative distance between ego agent' shield and opponent's sword |
| 26 - 28 | relative velocity between ego agent' shield and opponent's sword |
| 29 - 31 | relative distance between ego agent' sword and opponent's torse |
| 32 - 34 | relative velocity between ego agent' sword and opponent's torse |
| 35 - 37 | relative distance between ego agent' sword and opponent's head |
| 38 - 40 | relative velocity between ego agent' sword and opponent's head |
| 41 - 43 | relative distance between ego agent' sword and opponent's right arm |
| 44 - 46 | relative distance between ego agent' sword and opponent's right thigh |
| 47 - 49 | relative distance between ego agent' sword and opponent's left thigh |
#### Low-Level Action Space
| Index | Description |
|:-----:|:-----------------:|
| 0 - 30 | target rotations of each character’s joints |
#### High-Level Action Space
| Index | Description |
|:-----:|:-----------------:|
| 0 - 63 | latent skill variables |
#### Rewards
The weights of reward components are as follows:
```python
op_fall_reward_w = 200.0
ego_fall_out_reward_w = 50.0
shield_to_sword_pos_reward_w = 1.0
damage_reward_w = 8.0
sword_to_op_reward_w = 0.8
reward_energy_w = 3.0
reward_strike_vel_acc_w = 3.0
reward_face_w = 4.0
reward_foot_to_op_w = 10.0
reward_kick_w = 2.0
```
### Ant Sumo
Ant Sumo is a 3D environment with simulated physics that allows pairs of ant agents to compete against each other.
To win, the agent has to push the opponent out of the ring. Every agent has 100 hp . Each step, If the agent's body
touches the ground, its hp will be reduced by 1.The agent whose hp becomes 0 will be eliminated.
#### Observation Space
| Index | Description |
|:-------:|:------------------------------:|
| 0 - 2 | self pose |
| 3 - 6 | self rotation |
| 7 - 9 | self linear velocity |
| 10 - 12 | self angle velocity |
| 13 - 20 | self dof pos |
| 21 - 28 | self dof velocity |
| 29 - 31 | opponent pose |
| 32 - 35 | opponent rotation |
| 36 - 37 | self-opponent pose vector(x,y) |
| 38 | is self body touch ground |
| 39 | is opponent body touch ground |
#### Action Space
| Index | Description |
|:-----:|:-----------------:|
| 0 - 7 | self dof position |
#### Rewards
The reward consists of two parts:sparse reward and dense reward.
```python
win_reward = 2000
lose_penalty = -2000
draw_penalty = -1000
dense_reward_scale = 1.
dof_at_limit_cost = torch.sum(obs_buf[:, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
push_reward = -push_scale * torch.exp(-torch.linalg.norm(obs_buf_op[:, :2], dim=-1))
action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
not_move_penalty = -10 * torch.exp(-torch.sum(torch.abs(torques), dim=1))
dense_reward = move_reward + dof_at_limit_cost + push_reward + action_cost_penalty + not_move_penalty
total_reward = win_reward + lose_penalty + draw_penalty + dense_reward * dense_reward_scale
```
### Ant Battle
Ant Battle is an expanded environment of Ant Sumo. It supports more than two agents competing against with
each other. The battle ring radius will shrink, the agent going out of the ring will be eliminated.
#### Observation Space
| Index | Description |
|:-------:|:--------------------------------------:|
| 0 - 2 | self pose |
| 3 - 6 | self rotation |
| 7 - 9 | self linear velocity |
| 10 - 12 | self angle velocity |
| 13 - 20 | self dof pos |
| 21 - 28 | self dof velocity |
| 29 | border radius-self dis to centre |
| 30 | border radius |
| 31 | is self body touch ground |
| 32 - 34 | opponent_1 pose |
| 35 - 38 | opponent_1 rotation |
| 39 - 40 | self-opponent_1 pose vector(x,y) |
| 41 - 48 | opponent_1 dof pose |
| 49 - 56 | opponent_1 dof velocity |
| 57 | border radius-opponent_1 dis to centre |
| 58 | is opponent_1 body touch ground |
| ... | ... |
#### Action Space
| Index | Description |
|:-----:|:-----------------:|
| 0 - 7 | self dof position |
#### Rewards
The reward consists of two parts:sparse reward and dense reward.
```python
win_reward_scale = 2000
reward_per_rank = 2 * win_reward_scale / (num_agents - 1)
sparse_reward = sparse_reward * (win_reward_scale - (nxt_rank[:, 0] - 1) * reward_per_rank)
stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs[0, :, :2], dim=-1))
dof_at_limit_cost = torch.sum(obs[0, :, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
not_move_penalty = torch.exp(-torch.sum(torch.abs(torques), dim=1))
dense_reward = dof_at_limit_cost + action_cost_penalty + not_move_penalty + stay_in_center_reward
total_reward = sparse_reward + dense_reward * dense_reward_scale
```
================================================
FILE: setup.py
================================================
"""Installation script for the 'timechamber' python package."""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from setuptools import setup, find_packages
import os
root_dir = os.path.dirname(os.path.realpath(__file__))
# Minimum dependencies required prior to installation
INSTALL_REQUIRES = [
# RL
"gym==0.24",
"torch",
"omegaconf",
"termcolor",
"dill",
"hydra-core>=1.1",
"rl-games==1.5.2",
"pyvirtualdisplay",
"multielo @ git+https://github.com/djcunningham0/multielo.git@440f7922b90ff87009f8283d6491eb0f704e6624",
"matplotlib==3.5.2",
"pytest==7.1.2",
]
# Installation operation
setup(
name="timechamber",
author="ZeldaHuang, Ziyi Liu",
version="0.0.1",
description="A Massively Parallel Large Scale Self-Play Framework",
keywords=["robotics", "rl"],
include_package_data=True,
python_requires=">=3.6.*",
install_requires=INSTALL_REQUIRES,
packages=find_packages("."),
classifiers=["Natural Language :: English", "Programming Language :: Python :: 3.7, 3.8"],
zip_safe=False,
)
# EOF
================================================
FILE: timechamber/__init__.py
================================================
import hydra
from hydra import compose, initialize
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf
from timechamber.utils.reformat import omegaconf_to_dict
OmegaConf.register_new_resolver('eq', lambda x, y: x.lower()==y.lower())
OmegaConf.register_new_resolver('contains', lambda x, y: x.lower() in y.lower())
OmegaConf.register_new_resolver('if', lambda pred, a, b: a if pred else b)
OmegaConf.register_new_resolver('resolve_default', lambda default, arg: default if arg=='' else arg)
def make(
seed: int,
task: str,
num_envs: int,
sim_device: str,
rl_device: str,
graphics_device_id: int = -1,
device_type: str = "cuda",
headless: bool = False,
multi_gpu: bool = False,
virtual_screen_capture: bool = False,
force_render: bool = True,
cfg: DictConfig = None
):
from timechamber.utils.rlgames_utils import get_rlgames_env_creator
# create hydra config if no config passed in
if cfg is None:
# reset current hydra config if already parsed (but not passed in here)
if HydraConfig.initialized():
task = HydraConfig.get().runtime.choices['task']
hydra.core.global_hydra.GlobalHydra.instance().clear()
with initialize(config_path="./cfg"):
cfg = compose(config_name="config", overrides=[f"task={task}"])
task_dict = omegaconf_to_dict(cfg.task)
task_dict['env']['numEnvs'] = num_envs
# reuse existing config
else:
task_dict = omegaconf_to_dict(cfg.task)
task_dict['seed'] = cfg.seed
task_dict['rl_device'] = rl_device
if cfg.motion_file:
task_dict['env']['motion_file'] = cfg.motion_file
create_rlgpu_env = get_rlgames_env_creator(
seed=seed,
cfg=cfg,
task_config=task_dict,
task_name=task_dict["name"],
sim_device=sim_device,
rl_device=rl_device,
graphics_device_id=graphics_device_id,
headless=headless,
device_type=device_type,
multi_gpu=multi_gpu,
virtual_screen_capture=virtual_screen_capture,
force_render=force_render,
)
return create_rlgpu_env()
================================================
FILE: timechamber/ase/ase_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
import torch.nn as nn
from isaacgym.torch_utils import *
from rl_games.algos_torch import torch_ext
from rl_games.common import a2c_common
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from timechamber.ase import ase_network_builder
from timechamber.ase.utils import amp_agent
class ASEAgent(amp_agent.AMPAgent):
def __init__(self, base_name, config):
super().__init__(base_name, config)
return
def init_tensors(self):
super().init_tensors()
batch_shape = self.experience_buffer.obs_base_shape
self.experience_buffer.tensor_dict['ase_latents'] = torch.zeros(batch_shape + (self._latent_dim,),
dtype=torch.float32, device=self.ppo_device)
self._ase_latents = torch.zeros((batch_shape[-1], self._latent_dim), dtype=torch.float32,
device=self.ppo_device)
self.tensor_list += ['ase_latents']
self._latent_reset_steps = torch.zeros(batch_shape[-1], dtype=torch.int32, device=self.ppo_device)
num_envs = self.vec_env.env.task.num_envs
env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device)
self._reset_latent_step_count(env_ids)
return
def play_steps(self):
self.set_eval()
epinfos = []
done_indices = []
update_list = self.update_list
for n in range(self.horizon_length):
self.obs = self.env_reset(done_indices)
self.experience_buffer.update_data('obses', n, self.obs['obs'])
self._update_latents()
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, self._ase_latents, masks)
else:
res_dict = self.get_action_values(self.obs, self._ase_latents, self._rand_action_probs)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
shaped_rewards = self.rewards_shaper(rewards)
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
self.experience_buffer.update_data('amp_obs', n, infos['amp_obs'])
self.experience_buffer.update_data('ase_latents', n, self._ase_latents)
self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask'])
terminated = infos['terminate'].float()
terminated = terminated.unsqueeze(-1)
next_vals = self._eval_critic(self.obs, self._ase_latents)
next_vals *= (1.0 - terminated)
self.experience_buffer.update_data('next_values', n, next_vals)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
self.game_rewards.update(self.current_rewards[done_indices])
self.game_lengths.update(self.current_lengths[done_indices])
self.algo_observer.process_infos(infos, done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
if (self.vec_env.env.task.viewer):
self._amp_debug(infos, self._ase_latents)
done_indices = done_indices[:, 0]
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_next_values = self.experience_buffer.tensor_dict['next_values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs']
mb_ase_latents = self.experience_buffer.tensor_dict['ase_latents']
amp_rewards = self._calc_amp_rewards(mb_amp_obs, mb_ase_latents)
mb_rewards = self._combine_rewards(mb_rewards, amp_rewards)
mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
for k, v in amp_rewards.items():
batch_dict[k] = a2c_common.swap_and_flatten01(v)
return batch_dict
def get_action_values(self, obs_dict, ase_latents, rand_action_probs):
processed_obs = self._preproc_obs(obs_dict['obs'])
self.model.eval()
input_dict = {
'is_train': False,
'prev_actions': None,
'obs' : processed_obs,
'rnn_states' : self.rnn_states,
'ase_latents': ase_latents
}
with torch.no_grad():
res_dict = self.model(input_dict)
if self.has_central_value:
states = obs_dict['states']
input_dict = {
'is_train': False,
'states' : states,
}
value = self.get_central_value(input_dict)
res_dict['values'] = value
if self.normalize_value:
res_dict['values'] = self.value_mean_std(res_dict['values'], True)
rand_action_mask = torch.bernoulli(rand_action_probs)
det_action_mask = rand_action_mask == 0.0
res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask]
res_dict['rand_action_mask'] = rand_action_mask
return res_dict
def prepare_dataset(self, batch_dict):
super().prepare_dataset(batch_dict)
ase_latents = batch_dict['ase_latents']
self.dataset.values_dict['ase_latents'] = ase_latents
return
def calc_gradients(self, input_dict):
self.set_train()
value_preds_batch = input_dict['old_values']
old_action_log_probs_batch = input_dict['old_logp_actions']
advantage = input_dict['advantages']
old_mu_batch = input_dict['mu']
old_sigma_batch = input_dict['sigma']
return_batch = input_dict['returns']
actions_batch = input_dict['actions']
obs_batch = input_dict['obs']
obs_batch = self._preproc_obs(obs_batch)
amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size]
amp_obs = self._preproc_amp_obs(amp_obs)
if (self._enable_enc_grad_penalty()):
amp_obs.requires_grad_(True)
amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size]
amp_obs_replay = self._preproc_amp_obs(amp_obs_replay)
amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size]
amp_obs_demo = self._preproc_amp_obs(amp_obs_demo)
amp_obs_demo.requires_grad_(True)
ase_latents = input_dict['ase_latents']
rand_action_mask = input_dict['rand_action_mask']
rand_action_sum = torch.sum(rand_action_mask)
lr = self.last_lr
kl = 1.0
lr_mul = 1.0
curr_e_clip = lr_mul * self.e_clip
batch_dict = {
'is_train': True,
'prev_actions': actions_batch,
'obs' : obs_batch,
'amp_obs' : amp_obs,
'amp_obs_replay' : amp_obs_replay,
'amp_obs_demo' : amp_obs_demo,
'ase_latents': ase_latents
}
rnn_masks = None
if self.is_rnn:
rnn_masks = input_dict['rnn_masks']
batch_dict['rnn_states'] = input_dict['rnn_states']
batch_dict['seq_length'] = self.seq_len
rnn_masks = None
if self.is_rnn:
rnn_masks = input_dict['rnn_masks']
batch_dict['rnn_states'] = input_dict['rnn_states']
batch_dict['seq_length'] = self.seq_len
with torch.cuda.amp.autocast(enabled=self.mixed_precision):
res_dict = self.model(batch_dict)
action_log_probs = res_dict['prev_neglogp']
values = res_dict['values']
entropy = res_dict['entropy']
mu = res_dict['mus']
sigma = res_dict['sigmas']
disc_agent_logit = res_dict['disc_agent_logit']
disc_agent_replay_logit = res_dict['disc_agent_replay_logit']
disc_demo_logit = res_dict['disc_demo_logit']
enc_pred = res_dict['enc_pred']
a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
a_loss = a_info['actor_loss']
a_clipped = a_info['actor_clipped'].float()
c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
c_loss = c_info['critic_loss']
b_loss = self.bound_loss(mu)
c_loss = torch.mean(c_loss)
a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum
entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum
b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum
a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum
disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0)
disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo)
disc_loss = disc_info['disc_loss']
enc_latents = batch_dict['ase_latents'][0:self._amp_minibatch_size]
enc_loss_mask = rand_action_mask[0:self._amp_minibatch_size]
enc_info = self._enc_loss(enc_pred, enc_latents, batch_dict['amp_obs'], enc_loss_mask)
enc_loss = enc_info['enc_loss']
loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \
+ self._disc_coef * disc_loss + self._enc_coef * enc_loss
if (self._enable_amp_diversity_bonus()):
diversity_loss = self._diversity_loss(batch_dict['obs'], mu, batch_dict['ase_latents'])
diversity_loss = torch.sum(rand_action_mask * diversity_loss) / rand_action_sum
loss += self._amp_diversity_bonus * diversity_loss
a_info['amp_diversity_loss'] = diversity_loss
a_info['actor_loss'] = a_loss
a_info['actor_clip_frac'] = a_clip_frac
c_info['critic_loss'] = c_loss
if self.multi_gpu:
self.optimizer.zero_grad()
else:
for param in self.model.parameters():
param.grad = None
self.scaler.scale(loss).backward()
#TODO: Refactor this ugliest code of the year
if self.truncate_grads:
if self.multi_gpu:
self.optimizer.synchronize()
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
with self.optimizer.skip_synchronize():
self.scaler.step(self.optimizer)
self.scaler.update()
else:
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
self.scaler.step(self.optimizer)
self.scaler.update()
else:
self.scaler.step(self.optimizer)
self.scaler.update()
with torch.no_grad():
reduce_kl = not self.is_rnn
kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
if self.is_rnn:
kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask
self.train_result = {
'entropy': entropy,
'kl': kl_dist,
'last_lr': self.last_lr,
'lr_mul': lr_mul,
'b_loss': b_loss
}
self.train_result.update(a_info)
self.train_result.update(c_info)
self.train_result.update(disc_info)
self.train_result.update(enc_info)
return
def env_reset(self, env_ids=None):
obs = super().env_reset(env_ids)
if (env_ids is None):
num_envs = self.vec_env.env.task.num_envs
env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device)
if (len(env_ids) > 0):
self._reset_latents(env_ids)
self._reset_latent_step_count(env_ids)
return obs
def _reset_latent_step_count(self, env_ids):
self._latent_reset_steps[env_ids] = torch.randint_like(self._latent_reset_steps[env_ids], low=self._latent_steps_min,
high=self._latent_steps_max)
return
def _load_config_params(self, config):
super()._load_config_params(config)
self._latent_dim = config['latent_dim']
self._latent_steps_min = config.get('latent_steps_min', np.inf)
self._latent_steps_max = config.get('latent_steps_max', np.inf)
self._latent_dim = config['latent_dim']
self._amp_diversity_bonus = config['amp_diversity_bonus']
self._amp_diversity_tar = config['amp_diversity_tar']
self._enc_coef = config['enc_coef']
self._enc_weight_decay = config['enc_weight_decay']
self._enc_reward_scale = config['enc_reward_scale']
self._enc_grad_penalty = config['enc_grad_penalty']
self._enc_reward_w = config['enc_reward_w']
return
def _build_net_config(self):
config = super()._build_net_config()
config['ase_latent_shape'] = (self._latent_dim,)
return config
def _reset_latents(self, env_ids):
n = len(env_ids)
z = self._sample_latents(n)
self._ase_latents[env_ids] = z
if (self.vec_env.env.task.viewer):
self._change_char_color(env_ids)
return
def _sample_latents(self, n):
z = self.model.a2c_network.sample_latents(n)
return z
def _update_latents(self):
new_latent_envs = self._latent_reset_steps <= self.vec_env.env.task.progress_buf
need_update = torch.any(new_latent_envs)
if (need_update):
new_latent_env_ids = new_latent_envs.nonzero(as_tuple=False).flatten()
self._reset_latents(new_latent_env_ids)
self._latent_reset_steps[new_latent_env_ids] += torch.randint_like(self._latent_reset_steps[new_latent_env_ids],
low=self._latent_steps_min,
high=self._latent_steps_max)
if (self.vec_env.env.task.viewer):
self._change_char_color(new_latent_env_ids)
return
def _eval_actor(self, obs, ase_latents):
output = self.model.eval_actor(obs=obs, ase_latents=ase_latents)
return output
def _eval_critic(self, obs_dict, ase_latents):
self.model.eval()
obs = obs_dict['obs']
processed_obs = self._preproc_obs(obs)
value = self.model.eval_critic(processed_obs, ase_latents)
if self.normalize_value:
value = self.value_mean_std(value, True)
return value
def _calc_amp_rewards(self, amp_obs, ase_latents):
disc_r = self._calc_disc_rewards(amp_obs)
enc_r = self._calc_enc_rewards(amp_obs, ase_latents)
output = {
'disc_rewards': disc_r,
'enc_rewards': enc_r
}
return output
def _calc_enc_rewards(self, amp_obs, ase_latents):
with torch.no_grad():
enc_pred = self._eval_enc(amp_obs)
err = self._calc_enc_error(enc_pred, ase_latents)
enc_r = torch.clamp_min(-err, 0.0)
enc_r *= self._enc_reward_scale
return enc_r
def _enc_loss(self, enc_pred, ase_latent, enc_obs, loss_mask):
enc_err = self._calc_enc_error(enc_pred, ase_latent)
#mask_sum = torch.sum(loss_mask)
#enc_err = enc_err.squeeze(-1)
#enc_loss = torch.sum(loss_mask * enc_err) / mask_sum
enc_loss = torch.mean(enc_err)
# weight decay
if (self._enc_weight_decay != 0):
enc_weights = self.model.a2c_network.get_enc_weights()
enc_weights = torch.cat(enc_weights, dim=-1)
enc_weight_decay = torch.sum(torch.square(enc_weights))
enc_loss += self._enc_weight_decay * enc_weight_decay
enc_info = {
'enc_loss': enc_loss
}
if (self._enable_enc_grad_penalty()):
enc_obs_grad = torch.autograd.grad(enc_err, enc_obs, grad_outputs=torch.ones_like(enc_err),
create_graph=True, retain_graph=True, only_inputs=True)
enc_obs_grad = enc_obs_grad[0]
enc_obs_grad = torch.sum(torch.square(enc_obs_grad), dim=-1)
#enc_grad_penalty = torch.sum(loss_mask * enc_obs_grad) / mask_sum
enc_grad_penalty = torch.mean(enc_obs_grad)
enc_loss += self._enc_grad_penalty * enc_grad_penalty
enc_info['enc_grad_penalty'] = enc_grad_penalty.detach()
return enc_info
def _diversity_loss(self, obs, action_params, ase_latents):
assert(self.model.a2c_network.is_continuous)
n = obs.shape[0]
assert(n == action_params.shape[0])
new_z = self._sample_latents(n)
mu, sigma = self._eval_actor(obs=obs, ase_latents=new_z)
clipped_action_params = torch.clamp(action_params, -1.0, 1.0)
clipped_mu = torch.clamp(mu, -1.0, 1.0)
a_diff = clipped_action_params - clipped_mu
a_diff = torch.mean(torch.square(a_diff), dim=-1)
z_diff = new_z * ase_latents
z_diff = torch.sum(z_diff, dim=-1)
z_diff = 0.5 - 0.5 * z_diff
diversity_bonus = a_diff / (z_diff + 1e-5)
diversity_loss = torch.square(self._amp_diversity_tar - diversity_bonus)
return diversity_loss
def _calc_enc_error(self, enc_pred, ase_latent):
err = enc_pred * ase_latent
err = -torch.sum(err, dim=-1, keepdim=True)
return err
def _enable_enc_grad_penalty(self):
return self._enc_grad_penalty != 0
def _enable_amp_diversity_bonus(self):
return self._amp_diversity_bonus != 0
def _eval_enc(self, amp_obs):
proc_amp_obs = self._preproc_amp_obs(amp_obs)
return self.model.a2c_network.eval_enc(proc_amp_obs)
def _combine_rewards(self, task_rewards, amp_rewards):
disc_r = amp_rewards['disc_rewards']
enc_r = amp_rewards['enc_rewards']
combined_rewards = self._task_reward_w * task_rewards \
+ self._disc_reward_w * disc_r \
+ self._enc_reward_w * enc_r
return combined_rewards
def _record_train_batch_info(self, batch_dict, train_info):
super()._record_train_batch_info(batch_dict, train_info)
train_info['enc_rewards'] = batch_dict['enc_rewards']
return
def _log_train_info(self, train_info, frame):
super()._log_train_info(train_info, frame)
self.writer.add_scalar('losses/enc_loss', torch_ext.mean_list(train_info['enc_loss']).item(), frame)
if (self._enable_amp_diversity_bonus()):
self.writer.add_scalar('losses/amp_diversity_loss', torch_ext.mean_list(train_info['amp_diversity_loss']).item(), frame)
enc_reward_std, enc_reward_mean = torch.std_mean(train_info['enc_rewards'])
self.writer.add_scalar('info/enc_reward_mean', enc_reward_mean.item(), frame)
self.writer.add_scalar('info/enc_reward_std', enc_reward_std.item(), frame)
if (self._enable_enc_grad_penalty()):
self.writer.add_scalar('info/enc_grad_penalty', torch_ext.mean_list(train_info['enc_grad_penalty']).item(), frame)
return
def _change_char_color(self, env_ids):
base_col = np.array([0.4, 0.4, 0.4])
range_col = np.array([0.0706, 0.149, 0.2863])
range_sum = np.linalg.norm(range_col)
rand_col = np.random.uniform(0.0, 1.0, size=3)
rand_col = range_sum * rand_col / np.linalg.norm(rand_col)
rand_col += base_col
self.vec_env.env.task.set_char_color(rand_col, env_ids)
return
def _amp_debug(self, info, ase_latents):
with torch.no_grad():
amp_obs = info['amp_obs']
amp_obs = amp_obs
ase_latents = ase_latents
disc_pred = self._eval_disc(amp_obs)
amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents)
disc_reward = amp_rewards['disc_rewards']
enc_reward = amp_rewards['enc_rewards']
disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
disc_reward = disc_reward.cpu().numpy()[0, 0]
enc_reward = enc_reward.cpu().numpy()[0, 0]
print("disc_pred: ", disc_pred, disc_reward, enc_reward)
return
================================================
FILE: timechamber/ase/ase_models.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from timechamber.ase.utils import amp_models
class ModelASEContinuous(amp_models.ModelAMPContinuous):
def __init__(self, network):
super().__init__(network)
return
def build(self, config):
net = self.network_builder.build('ase', **config)
for name, _ in net.named_parameters():
print(name)
# print(f"ASE config: {config}")
obs_shape = config['input_shape']
normalize_value = config.get('normalize_value', False)
normalize_input = config.get('normalize_input', False)
value_size = config.get('value_size', 1)
return ModelASEContinuous.Network(net,obs_shape=obs_shape, normalize_value=normalize_value,
normalize_input=normalize_input, value_size=value_size)
class Network(amp_models.ModelAMPContinuous.Network):
def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size):
super().__init__(a2c_network,
obs_shape=obs_shape,
normalize_value=normalize_value,
normalize_input=normalize_input,
value_size=value_size)
return
def forward(self, input_dict):
is_train = input_dict.get('is_train', True)
result = super().forward(input_dict)
if (is_train):
amp_obs = input_dict['amp_obs']
enc_pred = self.a2c_network.eval_enc(amp_obs)
result["enc_pred"] = enc_pred
return result
def eval_actor(self, obs, ase_latents, use_hidden_latents=False):
processed_obs = self.norm_obs(obs)
mu, sigma = self.a2c_network.eval_actor(obs=processed_obs, ase_latents=ase_latents)
return mu, sigma
def eval_critic(self, obs, ase_latents, use_hidden_latents=False):
processed_obs = self.norm_obs(obs)
value = self.a2c_network.eval_critic(processed_obs, ase_latents, use_hidden_latents)
return value
================================================
FILE: timechamber/ase/ase_network_builder.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import layers
from rl_games.algos_torch import network_builder
import torch
import torch.nn as nn
import numpy as np
import enum
from timechamber.ase.utils import amp_network_builder
ENC_LOGIT_INIT_SCALE = 0.1
class LatentType(enum.Enum):
uniform = 0
sphere = 1
class ASEBuilder(amp_network_builder.AMPBuilder):
def __init__(self, **kwargs):
super().__init__(**kwargs)
return
class Network(amp_network_builder.AMPBuilder.Network):
def __init__(self, params, **kwargs):
actions_num = kwargs.get('actions_num')
input_shape = kwargs.get('input_shape')
self.value_size = kwargs.get('value_size', 1)
self.num_seqs = num_seqs = kwargs.get('num_seqs', 1)
amp_input_shape = kwargs.get('amp_input_shape')
self._ase_latent_shape = kwargs.get('ase_latent_shape')
network_builder.NetworkBuilder.BaseNetwork.__init__(self)
self.load(params)
actor_out_size, critic_out_size = self._build_actor_critic_net(input_shape, self._ase_latent_shape)
self.value = torch.nn.Linear(critic_out_size, self.value_size)
self.value_act = self.activations_factory.create(self.value_activation)
if self.is_discrete:
self.logits = torch.nn.Linear(actor_out_size, actions_num)
'''
for multidiscrete actions num is a tuple
'''
if self.is_multi_discrete:
self.logits = torch.nn.ModuleList([torch.nn.Linear(actor_out_size, num) for num in actions_num])
if self.is_continuous:
self.mu = torch.nn.Linear(actor_out_size, actions_num)
self.mu_act = self.activations_factory.create(self.space_config['mu_activation'])
mu_init = self.init_factory.create(**self.space_config['mu_init'])
self.sigma_act = self.activations_factory.create(self.space_config['sigma_activation'])
sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
if (not self.space_config['learn_sigma']):
self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
elif self.space_config['fixed_sigma']:
self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
else:
self.sigma = torch.nn.Linear(actor_out_size, actions_num)
mlp_init = self.init_factory.create(**self.initializer)
if self.has_cnn:
cnn_init = self.init_factory.create(**self.cnn['initializer'])
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
cnn_init(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
if isinstance(m, nn.Linear):
mlp_init(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
self.actor_mlp.init_params()
self.critic_mlp.init_params()
if self.is_continuous:
mu_init(self.mu.weight)
if self.space_config['fixed_sigma']:
sigma_init(self.sigma)
else:
sigma_init(self.sigma.weight)
self._build_disc(amp_input_shape)
self._build_enc(amp_input_shape)
return
def load(self, params):
super().load(params)
self._enc_units = params['enc']['units']
self._enc_activation = params['enc']['activation']
self._enc_initializer = params['enc']['initializer']
self._enc_separate = params['enc']['separate']
return
def forward(self, obs_dict):
obs = obs_dict['obs']
ase_latents = obs_dict['ase_latents']
states = obs_dict.get('rnn_states', None)
use_hidden_latents = obs_dict.get('use_hidden_latents', False)
actor_outputs = self.eval_actor(obs, ase_latents, use_hidden_latents)
value = self.eval_critic(obs, ase_latents, use_hidden_latents)
output = actor_outputs + (value, states)
return output
def eval_critic(self, obs, ase_latents, use_hidden_latents=False):
c_out = self.critic_cnn(obs)
c_out = c_out.contiguous().view(c_out.size(0), -1)
c_out = self.critic_mlp(c_out, ase_latents, use_hidden_latents)
value = self.value_act(self.value(c_out))
return value
def eval_actor(self, obs, ase_latents, use_hidden_latents=False):
a_out = self.actor_cnn(obs)
a_out = a_out.contiguous().view(a_out.size(0), -1)
a_out = self.actor_mlp(a_out, ase_latents, use_hidden_latents)
if self.is_discrete:
logits = self.logits(a_out)
return logits
if self.is_multi_discrete:
logits = [logit(a_out) for logit in self.logits]
return logits
if self.is_continuous:
mu = self.mu_act(self.mu(a_out))
if self.space_config['fixed_sigma']:
sigma = mu * 0.0 + self.sigma_act(self.sigma)
else:
sigma = self.sigma_act(self.sigma(a_out))
return mu, sigma
return
def get_enc_weights(self):
weights = []
for m in self._enc_mlp.modules():
if isinstance(m, nn.Linear):
weights.append(torch.flatten(m.weight))
weights.append(torch.flatten(self._enc.weight))
return weights
def _build_actor_critic_net(self, input_shape, ase_latent_shape):
style_units = [512, 256]
style_dim = ase_latent_shape[-1]
self.actor_cnn = nn.Sequential()
self.critic_cnn = nn.Sequential()
act_fn = self.activations_factory.create(self.activation)
initializer = self.init_factory.create(**self.initializer)
self.actor_mlp = AMPStyleCatNet1(obs_size=input_shape[-1],
ase_latent_size=ase_latent_shape[-1],
units=self.units,
activation=act_fn,
style_units=style_units,
style_dim=style_dim,
initializer=initializer)
if self.separate:
self.critic_mlp = AMPMLPNet(obs_size=input_shape[-1],
ase_latent_size=ase_latent_shape[-1],
units=self.units,
activation=act_fn,
initializer=initializer)
actor_out_size = self.actor_mlp.get_out_size()
critic_out_size = self.critic_mlp.get_out_size()
return actor_out_size, critic_out_size
def _build_enc(self, input_shape):
if (self._enc_separate):
self._enc_mlp = nn.Sequential()
mlp_args = {
'input_size' : input_shape[0],
'units' : self._enc_units,
'activation' : self._enc_activation,
'dense_func' : torch.nn.Linear
}
self._enc_mlp = self._build_mlp(**mlp_args)
mlp_init = self.init_factory.create(**self._enc_initializer)
for m in self._enc_mlp.modules():
if isinstance(m, nn.Linear):
mlp_init(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
else:
self._enc_mlp = self._disc_mlp
mlp_out_layer = list(self._enc_mlp.modules())[-2]
mlp_out_size = mlp_out_layer.out_features
self._enc = torch.nn.Linear(mlp_out_size, self._ase_latent_shape[-1])
torch.nn.init.uniform_(self._enc.weight, -ENC_LOGIT_INIT_SCALE, ENC_LOGIT_INIT_SCALE)
torch.nn.init.zeros_(self._enc.bias)
return
def eval_enc(self, amp_obs):
enc_mlp_out = self._enc_mlp(amp_obs)
enc_output = self._enc(enc_mlp_out)
enc_output = torch.nn.functional.normalize(enc_output, dim=-1)
return enc_output
def sample_latents(self, n):
device = next(self._enc.parameters()).device
z = torch.normal(torch.zeros([n, self._ase_latent_shape[-1]], device=device))
z = torch.nn.functional.normalize(z, dim=-1)
return z
def build(self, name, **kwargs):
net = ASEBuilder.Network(self.params, **kwargs)
return net
class AMPMLPNet(torch.nn.Module):
def __init__(self, obs_size, ase_latent_size, units, activation, initializer):
super().__init__()
input_size = obs_size + ase_latent_size
print('build amp mlp net:', input_size)
self._units = units
self._initializer = initializer
self._mlp = []
in_size = input_size
for i in range(len(units)):
unit = units[i]
curr_dense = torch.nn.Linear(in_size, unit)
self._mlp.append(curr_dense)
self._mlp.append(activation)
in_size = unit
self._mlp = nn.Sequential(*self._mlp)
self.init_params()
return
def forward(self, obs, latent, skip_style):
inputs = [obs, latent]
input = torch.cat(inputs, dim=-1)
output = self._mlp(input)
return output
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Linear):
self._initializer(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
return
def get_out_size(self):
out_size = self._units[-1]
return out_size
class AMPStyleCatNet1(torch.nn.Module):
def __init__(self, obs_size, ase_latent_size, units, activation,
style_units, style_dim, initializer):
super().__init__()
print('build amp style cat net:', obs_size, ase_latent_size)
self._activation = activation
self._initializer = initializer
self._dense_layers = []
self._units = units
self._style_dim = style_dim
self._style_activation = torch.tanh
self._style_mlp = self._build_style_mlp(style_units, ase_latent_size)
self._style_dense = torch.nn.Linear(style_units[-1], style_dim)
in_size = obs_size + style_dim
for i in range(len(units)):
unit = units[i]
out_size = unit
curr_dense = torch.nn.Linear(in_size, out_size)
self._dense_layers.append(curr_dense)
in_size = out_size
self._dense_layers = nn.ModuleList(self._dense_layers)
self.init_params()
return
def forward(self, obs, latent, skip_style):
if (skip_style):
style = latent
else:
style = self.eval_style(latent)
h = torch.cat([obs, style], dim=-1)
for i in range(len(self._dense_layers)):
curr_dense = self._dense_layers[i]
h = curr_dense(h)
h = self._activation(h)
return h
def eval_style(self, latent):
style_h = self._style_mlp(latent)
style = self._style_dense(style_h)
style = self._style_activation(style)
return style
def init_params(self):
scale_init_range = 1.0
for m in self.modules():
if isinstance(m, nn.Linear):
self._initializer(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
nn.init.uniform_(self._style_dense.weight, -scale_init_range, scale_init_range)
return
def get_out_size(self):
out_size = self._units[-1]
return out_size
def _build_style_mlp(self, style_units, input_size):
in_size = input_size
layers = []
for unit in style_units:
layers.append(torch.nn.Linear(in_size, unit))
layers.append(self._activation)
in_size = unit
enc_mlp = nn.Sequential(*layers)
return enc_mlp
================================================
FILE: timechamber/ase/ase_players.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from pytest import param
import torch
from isaacgym.torch_utils import *
from rl_games.algos_torch import players
from timechamber.ase.utils import amp_players
import timechamber.ase.ase_network_builder as ase_network_builder
class ASEPlayer(amp_players.AMPPlayerContinuous):
def __init__(self, params):
config = params['config']
self._latent_dim = config['latent_dim']
self._latent_steps_min = config.get('latent_steps_min', np.inf)
self._latent_steps_max = config.get('latent_steps_max', np.inf)
self._enc_reward_scale = config['enc_reward_scale']
super().__init__(params)
if (hasattr(self, 'env')) and self.env is not None:
batch_size = self.env.task.num_envs
else:
batch_size = self.env_info['num_envs']
self._ase_latents = torch.zeros((batch_size, self._latent_dim), dtype=torch.float32,
device=self.device)
return
def run(self):
self._reset_latent_step_count()
super().run()
return
def get_action(self, obs_dict, is_determenistic=False):
self._update_latents()
obs = obs_dict['obs']
if len(obs.size()) == len(self.obs_shape):
obs = obs.unsqueeze(0)
obs = self._preproc_obs(obs)
ase_latents = self._ase_latents
input_dict = {
'is_train': False,
'prev_actions': None,
'obs' : obs,
'rnn_states' : self.states,
'ase_latents': ase_latents
}
with torch.no_grad():
res_dict = self.model(input_dict)
mu = res_dict['mus']
action = res_dict['actions']
self.states = res_dict['rnn_states']
if is_determenistic:
current_action = mu
else:
current_action = action
current_action = torch.squeeze(current_action.detach())
return players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0))
def env_reset(self, env_ids=None):
obs = super().env_reset(env_ids)
self._reset_latents(env_ids)
return obs
def _build_net_config(self):
config = super()._build_net_config()
config['ase_latent_shape'] = (self._latent_dim,)
return config
def _reset_latents(self, done_env_ids=None):
if (done_env_ids is None):
num_envs = self.env.task.num_envs
done_env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device)
rand_vals = self.model.a2c_network.sample_latents(len(done_env_ids))
self._ase_latents[done_env_ids] = rand_vals
self._change_char_color(done_env_ids)
return
def _update_latents(self):
if (self._latent_step_count <= 0):
self._reset_latents()
self._reset_latent_step_count()
if (self.env.task.viewer):
print("Sampling new amp latents------------------------------")
num_envs = self.env.task.num_envs
env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device)
self._change_char_color(env_ids)
else:
self._latent_step_count -= 1
return
def _reset_latent_step_count(self):
self._latent_step_count = np.random.randint(self._latent_steps_min, self._latent_steps_max)
return
def _calc_amp_rewards(self, amp_obs, ase_latents):
disc_r = self._calc_disc_rewards(amp_obs)
enc_r = self._calc_enc_rewards(amp_obs, ase_latents)
output = {
'disc_rewards': disc_r,
'enc_rewards': enc_r
}
return output
def _calc_enc_rewards(self, amp_obs, ase_latents):
with torch.no_grad():
enc_pred = self._eval_enc(amp_obs)
err = self._calc_enc_error(enc_pred, ase_latents)
enc_r = torch.clamp_min(-err, 0.0)
enc_r *= self._enc_reward_scale
return enc_r
def _calc_enc_error(self, enc_pred, ase_latent):
err = enc_pred * ase_latent
err = -torch.sum(err, dim=-1, keepdim=True)
return err
def _eval_enc(self, amp_obs):
proc_amp_obs = self._preproc_amp_obs(amp_obs)
return self.model.a2c_network.eval_enc(proc_amp_obs)
def _amp_debug(self, info):
with torch.no_grad():
amp_obs = info['amp_obs']
amp_obs = amp_obs
ase_latents = self._ase_latents
disc_pred = self._eval_disc(amp_obs)
amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents)
disc_reward = amp_rewards['disc_rewards']
enc_reward = amp_rewards['enc_rewards']
disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
disc_reward = disc_reward.cpu().numpy()[0, 0]
enc_reward = enc_reward.cpu().numpy()[0, 0]
print("disc_pred: ", disc_pred, disc_reward, enc_reward)
return
def _change_char_color(self, env_ids):
base_col = np.array([0.4, 0.4, 0.4])
range_col = np.array([0.0706, 0.149, 0.2863])
range_sum = np.linalg.norm(range_col)
rand_col = np.random.uniform(0.0, 1.0, size=3)
rand_col = range_sum * rand_col / np.linalg.norm(rand_col)
rand_col += base_col
self.env.task.set_char_color(rand_col, env_ids)
return
================================================
FILE: timechamber/ase/hrl_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import copy
from datetime import datetime
from distutils.command.config import config
from gym import spaces
import numpy as np
import os
import time
import yaml
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common import a2c_common
from rl_games.common import datasets
from rl_games.common import schedulers
from rl_games.common import vecenv
import torch
from torch import optim
import timechamber.ase.utils.common_agent as common_agent
import timechamber.ase.ase_agent as ase_agent
import timechamber.ase.ase_models as ase_models
import timechamber.ase.ase_network_builder as ase_network_builder
from tensorboardX import SummaryWriter
class HRLAgent(common_agent.CommonAgent):
def __init__(self, base_name, params):
config = params['config']
with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f:
llc_config = yaml.load(f, Loader=yaml.SafeLoader)
llc_config_params = llc_config['params']
self._latent_dim = llc_config_params['config']['latent_dim']
super().__init__(base_name, params)
self._task_size = self.vec_env.env.task.get_task_obs_size()
self._llc_steps = config['llc_steps']
llc_checkpoint = config['llc_checkpoint']
assert(llc_checkpoint != "")
self._build_llc(llc_config_params, llc_checkpoint)
return
def env_step(self, actions):
actions = self.preprocess_actions(actions)
obs = self.obs['obs']
rewards = 0.0
disc_rewards = 0.0
done_count = 0.0
terminate_count = 0.0
for t in range(self._llc_steps):
llc_actions = self._compute_llc_action(obs, actions)
obs_dict, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions)
# TODO
obs = obs_dict['obs']
rewards += curr_rewards
done_count += curr_dones
terminate_count += infos['terminate']
amp_obs = infos['amp_obs']
curr_disc_reward = self._calc_disc_reward(amp_obs)
disc_rewards += curr_disc_reward
rewards /= self._llc_steps
disc_rewards /= self._llc_steps
dones = torch.zeros_like(done_count)
dones[done_count > 0] = 1.0
terminate = torch.zeros_like(terminate_count)
terminate[terminate_count > 0] = 1.0
infos['terminate'] = terminate
infos['disc_rewards'] = disc_rewards
if self.is_tensor_obses:
if self.value_size == 1:
rewards = rewards.unsqueeze(1)
return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
else:
if self.value_size == 1:
rewards = np.expand_dims(rewards, axis=1)
return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(dones).to(self.ppo_device), infos
def cast_obs(self, obs):
obs = super().cast_obs(obs)
self._llc_agent.is_tensor_obses = self.is_tensor_obses
return obs
def preprocess_actions(self, actions):
clamped_actions = torch.clamp(actions, -1.0, 1.0)
if not self.is_tensor_obses:
clamped_actions = clamped_actions.cpu().numpy()
return clamped_actions
def play_steps(self):
self.set_eval()
epinfos = []
done_indices = torch.tensor([], device=self.device, dtype=torch.long)
update_list = self.update_list
for n in range(self.horizon_length):
self.obs = self.env_reset(done_indices)
self.experience_buffer.update_data('obses', n, self.obs['obs'])
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, masks)
else:
res_dict = self.get_action_values(self.obs)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
shaped_rewards = self.rewards_shaper(rewards)
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards'])
terminated = infos['terminate'].float()
terminated = terminated.unsqueeze(-1)
next_vals = self._eval_critic(self.obs)
next_vals *= (1.0 - terminated)
self.experience_buffer.update_data('next_values', n, next_vals)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
self.game_rewards.update(self.current_rewards[done_indices])
self.game_lengths.update(self.current_lengths[done_indices])
self.algo_observer.process_infos(infos, done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
done_indices = done_indices[:, 0]
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_next_values = self.experience_buffer.tensor_dict['next_values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards']
mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards)
mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
return batch_dict
def _load_config_params(self, config):
super()._load_config_params(config)
self._task_reward_w = config['task_reward_w']
self._disc_reward_w = config['disc_reward_w']
return
def _get_mean_rewards(self):
rewards = super()._get_mean_rewards()
rewards *= self._llc_steps
return rewards
def _setup_action_space(self):
super()._setup_action_space()
self.actions_num = self._latent_dim
return
def init_tensors(self):
super().init_tensors()
del self.experience_buffer.tensor_dict['actions']
del self.experience_buffer.tensor_dict['mus']
del self.experience_buffer.tensor_dict['sigmas']
batch_shape = self.experience_buffer.obs_base_shape
self.experience_buffer.tensor_dict['actions'] = torch.zeros(batch_shape + (self._latent_dim,),
dtype=torch.float32, device=self.ppo_device)
self.experience_buffer.tensor_dict['mus'] = torch.zeros(batch_shape + (self._latent_dim,),
dtype=torch.float32, device=self.ppo_device)
self.experience_buffer.tensor_dict['sigmas'] = torch.zeros(batch_shape + (self._latent_dim,),
dtype=torch.float32, device=self.ppo_device)
self.experience_buffer.tensor_dict['disc_rewards'] = torch.zeros_like(self.experience_buffer.tensor_dict['rewards'])
self.tensor_list += ['disc_rewards']
return
def _build_llc(self, config_params, checkpoint_file):
llc_agent_config = self._build_llc_agent_config(config_params)
self._llc_agent = ase_agent.ASEAgent('llc', llc_agent_config)
self._llc_agent.restore(checkpoint_file)
print("Loaded LLC checkpoint from {:s}".format(checkpoint_file))
self._llc_agent.set_eval()
return
def _build_llc_agent_config(self, config_params, network=None):
llc_env_info = copy.deepcopy(self.env_info)
obs_space = llc_env_info['observation_space']
obs_size = obs_space.shape[0]
obs_size -= self._task_size
llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
params = config_params
params['config']['network'] = network
params['config']['num_actors'] = self.num_actors
params['config']['features'] = {'observer' : self.algo_observer}
params['config']['env_info'] = llc_env_info
params['config']['device'] = self.device
return params
def _compute_llc_action(self, obs, actions):
llc_obs = self._extract_llc_obs(obs)
processed_obs = self._llc_agent._preproc_obs(llc_obs)
z = torch.nn.functional.normalize(actions, dim=-1)
mu, _ = self._llc_agent.model.eval_actor(obs=processed_obs, ase_latents=z)
llc_action = mu
llc_action = self._llc_agent.preprocess_actions(llc_action)
return llc_action
def _extract_llc_obs(self, obs):
obs_size = obs.shape[-1]
llc_obs = obs[..., :obs_size - self._task_size]
return llc_obs
def _calc_disc_reward(self, amp_obs):
disc_reward = self._llc_agent._calc_disc_rewards(amp_obs)
return disc_reward
def _combine_rewards(self, task_rewards, disc_rewards):
combined_rewards = self._task_reward_w * task_rewards + \
+ self._disc_reward_w * disc_rewards
#combined_rewards = task_rewards * disc_rewards
return combined_rewards
def _record_train_batch_info(self, batch_dict, train_info):
super()._record_train_batch_info(batch_dict, train_info)
train_info['disc_rewards'] = batch_dict['disc_rewards']
return
def _log_train_info(self, train_info, frame):
super()._log_train_info(train_info, frame)
disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards'])
self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame)
self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame)
return
================================================
FILE: timechamber/ase/hrl_models.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch.nn as nn
from rl_games.algos_torch.models import ModelA2CContinuousLogStd
class ModelHRLContinuous(ModelA2CContinuousLogStd):
def __init__(self, network):
super().__init__(network)
return
def build(self, config):
net = self.network_builder.build('amp', **config)
for name, _ in net.named_parameters():
print(name)
# print(f"ASE config: {config}")
obs_shape = config['input_shape']
normalize_value = config.get('normalize_value', False)
normalize_input = config.get('normalize_input', False)
value_size = config.get('value_size', 1)
return ModelHRLContinuous.Network(net, obs_shape=obs_shape, normalize_value=normalize_value,
normalize_input=normalize_input, value_size=value_size)
class Network(ModelA2CContinuousLogStd.Network):
def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size):
super().__init__(a2c_network,
obs_shape=obs_shape,
normalize_value=normalize_value,
normalize_input=normalize_input,
value_size=value_size)
return
def eval_critic(self, obs):
processed_obs = self.norm_obs(obs)
value = self.a2c_network.eval_critic(processed_obs)
values = self.unnorm_value(value)
return values
================================================
FILE: timechamber/ase/hrl_network_builder.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from rl_games.algos_torch import network_builder
import torch
import torch.nn as nn
from timechamber.ase import ase_network_builder
class HRLBuilder(network_builder.A2CBuilder):
def __init__(self, **kwargs):
super().__init__(**kwargs)
return
class Network(network_builder.A2CBuilder.Network):
def __init__(self, params, **kwargs):
super().__init__(params, **kwargs)
if self.is_continuous:
if (not self.space_config['learn_sigma']):
actions_num = kwargs.get('actions_num')
sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
sigma_init(self.sigma)
return
def forward(self, obs_dict):
mu, sigma, value, states = super().forward(obs_dict)
norm_mu = torch.tanh(mu)
return norm_mu, sigma, value, states
def eval_critic(self, obs):
c_out = self.critic_cnn(obs)
c_out = c_out.contiguous().view(c_out.size(0), -1)
c_out = self.critic_mlp(c_out)
value = self.value_act(self.value(c_out))
return value
def build(self, name, **kwargs):
net = HRLBuilder.Network(self.params, **kwargs)
return net
================================================
FILE: timechamber/ase/hrl_players.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import copy
from gym import spaces
import numpy as np
import os
import torch
import yaml
import time
from rl_games.algos_torch import players
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common.player import BasePlayer
import timechamber.ase.utils.common_player as common_player
import timechamber.ase.ase_models as ase_models
import timechamber.ase.ase_network_builder as ase_network_builder
import timechamber.ase.ase_players as ase_players
class HRLPlayer(common_player.CommonPlayer):
def __init__(self, params):
config = params['config']
with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f:
llc_config = yaml.load(f, Loader=yaml.SafeLoader)
llc_config_params = llc_config['params']
self._latent_dim = llc_config_params['config']['latent_dim']
super().__init__(params)
self._task_size = self.env.task.get_task_obs_size()
self._llc_steps = config['llc_steps']
llc_checkpoint = config['llc_checkpoint']
assert(llc_checkpoint != "")
self._build_llc(llc_config_params, llc_checkpoint)
return
def get_action(self, obs_dict, is_determenistic = False):
obs = obs_dict['obs']
if len(obs.size()) == len(self.obs_shape):
obs = obs.unsqueeze(0)
proc_obs = self._preproc_obs(obs)
input_dict = {
'is_train': False,
'prev_actions': None,
'obs' : proc_obs,
'rnn_states' : self.states
}
with torch.no_grad():
res_dict = self.model(input_dict)
mu = res_dict['mus']
action = res_dict['actions']
self.states = res_dict['rnn_states']
if is_determenistic:
current_action = mu
else:
current_action = action
current_action = torch.squeeze(current_action.detach())
clamped_actions = torch.clamp(current_action, -1.0, 1.0)
return clamped_actions
def run(self):
n_games = self.games_num
render = self.render_env
n_game_life = self.n_game_life
is_determenistic = self.is_determenistic
sum_rewards = 0
sum_steps = 0
sum_game_res = 0
n_games = n_games * n_game_life
games_played = 0
has_masks = False
has_masks_func = getattr(self.env, "has_action_mask", None) is not None
op_agent = getattr(self.env, "create_agent", None)
if op_agent:
agent_inited = True
if has_masks_func:
has_masks = self.env.has_action_mask()
need_init_rnn = self.is_rnn
for _ in range(n_games):
if games_played >= n_games:
break
obs_dict = self.env_reset()
batch_size = 1
if len(obs_dict['obs'].size()) > len(self.obs_shape):
batch_size = obs_dict['obs'].size()[0]
self.batch_size = batch_size
if need_init_rnn:
self.init_rnn()
need_init_rnn = False
cr = torch.zeros(batch_size, dtype=torch.float32)
steps = torch.zeros(batch_size, dtype=torch.float32)
print_game_res = False
done_indices = []
for n in range(self.max_steps):
obs_dict = self.env_reset(done_indices)
if has_masks:
masks = self.env.get_action_mask()
action = self.get_masked_action(obs_dict, masks, is_determenistic)
else:
action = self.get_action(obs_dict, is_determenistic)
obs_dict, r, done, info = self.env_step(self.env, obs_dict, action)
cr += r
steps += 1
self._post_step(info)
if render:
self.env.render(mode = 'human')
time.sleep(self.render_sleep)
all_done_indices = done.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
done_count = len(done_indices)
games_played += done_count
if done_count > 0:
if self.is_rnn:
for s in self.states:
s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0
cur_rewards = cr[done_indices].sum().item()
cur_steps = steps[done_indices].sum().item()
cr = cr * (1.0 - done.float())
steps = steps * (1.0 - done.float())
sum_rewards += cur_rewards
sum_steps += cur_steps
game_res = 0.0
if isinstance(info, dict):
if 'battle_won' in info:
print_game_res = True
game_res = info.get('battle_won', 0.5)
if 'scores' in info:
print_game_res = True
game_res = info.get('scores', 0.5)
if self.print_stats:
if print_game_res:
print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
else:
print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)
sum_game_res += game_res
if batch_size//self.num_agents == 1 or games_played >= n_games:
break
done_indices = done_indices[:, 0]
print(sum_rewards)
if print_game_res:
print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
else:
print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)
return
def env_step(self, env, obs_dict, action):
if not self.is_tensor_obses:
actions = actions.cpu().numpy()
obs = obs_dict['obs']
rewards = 0.0
done_count = 0.0
disc_rewards = 0.0
for t in range(self._llc_steps):
llc_actions = self._compute_llc_action(obs, action)
obs, curr_rewards, curr_dones, infos = env.step(llc_actions)
rewards += curr_rewards
done_count += curr_dones
amp_obs = infos['amp_obs']
curr_disc_reward = self._calc_disc_reward(amp_obs)
curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy()
disc_rewards += curr_disc_reward
rewards /= self._llc_steps
dones = torch.zeros_like(done_count)
dones[done_count > 0] = 1.0
disc_rewards /= self._llc_steps
if isinstance(obs, dict):
obs = obs['obs']
if obs.dtype == np.float64:
obs = np.float32(obs)
if self.value_size > 1:
rewards = rewards[0]
if self.is_tensor_obses:
return obs, rewards.cpu(), dones.cpu(), infos
else:
if np.isscalar(dones):
rewards = np.expand_dims(np.asarray(rewards), 0)
dones = np.expand_dims(np.asarray(dones), 0)
return torch.from_numpy(obs).to(self.device), torch.from_numpy(rewards), torch.from_numpy(dones), infos
def _build_llc(self, config_params, checkpoint_file):
llc_agent_config = self._build_llc_agent_config(config_params)
self._llc_agent = ase_players.ASEPlayer(llc_agent_config)
self._llc_agent.restore(checkpoint_file)
print("Loaded LLC checkpoint from {:s}".format(checkpoint_file))
return
def _build_llc_agent_config(self, config_params, network=None):
llc_env_info = copy.deepcopy(self.env_info)
obs_space = llc_env_info['observation_space']
obs_size = obs_space.shape[0]
obs_size -= self._task_size
llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
llc_env_info['amp_observation_space'] = self.env.amp_observation_space.shape
llc_env_info['num_envs'] = self.env.task.num_envs
params = config_params
params['config']['network'] = network
params['config']['env_info'] = llc_env_info
return params
def _setup_action_space(self):
super()._setup_action_space()
self.actions_num = self._latent_dim
return
def _compute_llc_action(self, obs, actions):
llc_obs = self._extract_llc_obs(obs)
processed_obs = self._llc_agent._preproc_obs(llc_obs)
z = torch.nn.functional.normalize(actions, dim=-1)
mu, _ = self._llc_agent.model.eval_actor(obs=processed_obs, ase_latents=z)
llc_action = players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(mu, -1.0, 1.0))
return llc_action
def _extract_llc_obs(self, obs):
obs_size = obs.shape[-1]
llc_obs = obs[..., :obs_size - self._task_size]
return llc_obs
def _calc_disc_reward(self, amp_obs):
disc_reward = self._llc_agent._calc_disc_rewards(amp_obs)
return disc_reward
================================================
FILE: timechamber/ase/utils/amp_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.algos_torch import torch_ext
from rl_games.common import a2c_common
from rl_games.common import schedulers
from rl_games.common import vecenv
from isaacgym.torch_utils import *
import time
from datetime import datetime
import numpy as np
from torch import optim
import torch
from torch import nn
import timechamber.ase.utils.replay_buffer as replay_buffer
import timechamber.ase.utils.common_agent as common_agent
from tensorboardX import SummaryWriter
class AMPAgent(common_agent.CommonAgent):
def __init__(self, base_name, params):
super().__init__(base_name, params)
if self._normalize_amp_input:
self._amp_input_mean_std = RunningMeanStd(self._amp_observation_space.shape).to(self.ppo_device)
return
def init_tensors(self):
super().init_tensors()
self._build_amp_buffers()
return
def set_eval(self):
super().set_eval()
if self._normalize_amp_input:
self._amp_input_mean_std.eval()
return
def set_train(self):
super().set_train()
if self._normalize_amp_input:
self._amp_input_mean_std.train()
return
def get_stats_weights(self):
state = super().get_stats_weights()
if self._normalize_amp_input:
state['amp_input_mean_std'] = self._amp_input_mean_std.state_dict()
return state
def set_stats_weights(self, weights):
super().set_stats_weights(weights)
if self._normalize_amp_input:
self._amp_input_mean_std.load_state_dict(weights['amp_input_mean_std'])
return
def play_steps(self):
self.set_eval()
epinfos = []
done_indices = []
update_list = self.update_list
for n in range(self.horizon_length):
self.obs = self.env_reset(done_indices)
self.experience_buffer.update_data('obses', n, self.obs['obs'])
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, masks)
else:
res_dict = self.get_action_values(self.obs, self._rand_action_probs)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
shaped_rewards = self.rewards_shaper(rewards)
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
self.experience_buffer.update_data('amp_obs', n, infos['amp_obs'])
self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask'])
terminated = infos['terminate'].float()
terminated = terminated.unsqueeze(-1)
next_vals = self._eval_critic(self.obs)
next_vals *= (1.0 - terminated)
self.experience_buffer.update_data('next_values', n, next_vals)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
self.game_rewards.update(self.current_rewards[done_indices])
self.game_lengths.update(self.current_lengths[done_indices])
self.algo_observer.process_infos(infos, done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
if (self.vec_env.env.task.viewer):
self._amp_debug(infos)
done_indices = done_indices[:, 0]
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_next_values = self.experience_buffer.tensor_dict['next_values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs']
amp_rewards = self._calc_amp_rewards(mb_amp_obs)
mb_rewards = self._combine_rewards(mb_rewards, amp_rewards)
mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
for k, v in amp_rewards.items():
batch_dict[k] = a2c_common.swap_and_flatten01(v)
return batch_dict
def get_action_values(self, obs_dict, rand_action_probs):
processed_obs = self._preproc_obs(obs_dict['obs'])
self.model.eval()
input_dict = {
'is_train': False,
'prev_actions': None,
'obs' : processed_obs,
'rnn_states' : self.rnn_states
}
with torch.no_grad():
res_dict = self.model(input_dict)
if self.has_central_value:
states = obs_dict['states']
input_dict = {
'is_train': False,
'states' : states,
}
value = self.get_central_value(input_dict)
res_dict['values'] = value
if self.normalize_value:
res_dict['values'] = self.value_mean_std(res_dict['values'], True)
rand_action_mask = torch.bernoulli(rand_action_probs)
det_action_mask = rand_action_mask == 0.0
res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask]
res_dict['rand_action_mask'] = rand_action_mask
return res_dict
def prepare_dataset(self, batch_dict):
super().prepare_dataset(batch_dict)
self.dataset.values_dict['amp_obs'] = batch_dict['amp_obs']
self.dataset.values_dict['amp_obs_demo'] = batch_dict['amp_obs_demo']
self.dataset.values_dict['amp_obs_replay'] = batch_dict['amp_obs_replay']
rand_action_mask = batch_dict['rand_action_mask']
self.dataset.values_dict['rand_action_mask'] = rand_action_mask
return
def train_epoch(self):
play_time_start = time.time()
with torch.no_grad():
if self.is_rnn:
batch_dict = self.play_steps_rnn()
else:
batch_dict = self.play_steps()
play_time_end = time.time()
update_time_start = time.time()
rnn_masks = batch_dict.get('rnn_masks', None)
self._update_amp_demos()
num_obs_samples = batch_dict['amp_obs'].shape[0]
amp_obs_demo = self._amp_obs_demo_buffer.sample(num_obs_samples)['amp_obs']
batch_dict['amp_obs_demo'] = amp_obs_demo
if (self._amp_replay_buffer.get_total_count() == 0):
batch_dict['amp_obs_replay'] = batch_dict['amp_obs']
else:
batch_dict['amp_obs_replay'] = self._amp_replay_buffer.sample(num_obs_samples)['amp_obs']
self.set_train()
self.curr_frames = batch_dict.pop('played_frames')
self.prepare_dataset(batch_dict)
self.algo_observer.after_steps()
if self.has_central_value:
self.train_central_value()
train_info = None
if self.is_rnn:
frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
print(frames_mask_ratio)
for _ in range(0, self.mini_epochs_num):
ep_kls = []
for i in range(len(self.dataset)):
curr_train_info = self.train_actor_critic(self.dataset[i])
if self.schedule_type == 'legacy':
if self.multi_gpu:
curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
self.update_lr(self.last_lr)
if (train_info is None):
train_info = dict()
for k, v in curr_train_info.items():
train_info[k] = [v]
else:
for k, v in curr_train_info.items():
train_info[k].append(v)
av_kls = torch_ext.mean_list(train_info['kl'])
if self.schedule_type == 'standard':
if self.multi_gpu:
av_kls = self.hvd.average_value(av_kls, 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
self.update_lr(self.last_lr)
if self.schedule_type == 'standard_epoch':
if self.multi_gpu:
av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
self.update_lr(self.last_lr)
update_time_end = time.time()
play_time = play_time_end - play_time_start
update_time = update_time_end - update_time_start
total_time = update_time_end - play_time_start
self._store_replay_amp_obs(batch_dict['amp_obs'])
train_info['play_time'] = play_time
train_info['update_time'] = update_time
train_info['total_time'] = total_time
self._record_train_batch_info(batch_dict, train_info)
return train_info
def calc_gradients(self, input_dict):
self.set_train()
value_preds_batch = input_dict['old_values']
old_action_log_probs_batch = input_dict['old_logp_actions']
advantage = input_dict['advantages']
old_mu_batch = input_dict['mu']
old_sigma_batch = input_dict['sigma']
return_batch = input_dict['returns']
actions_batch = input_dict['actions']
obs_batch = input_dict['obs']
obs_batch = self._preproc_obs(obs_batch)
amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size]
amp_obs = self._preproc_amp_obs(amp_obs)
amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size]
amp_obs_replay = self._preproc_amp_obs(amp_obs_replay)
amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size]
amp_obs_demo = self._preproc_amp_obs(amp_obs_demo)
amp_obs_demo.requires_grad_(True)
rand_action_mask = input_dict['rand_action_mask']
rand_action_sum = torch.sum(rand_action_mask)
lr = self.last_lr
kl = 1.0
lr_mul = 1.0
curr_e_clip = lr_mul * self.e_clip
batch_dict = {
'is_train': True,
'prev_actions': actions_batch,
'obs' : obs_batch,
'amp_obs' : amp_obs,
'amp_obs_replay' : amp_obs_replay,
'amp_obs_demo' : amp_obs_demo
}
rnn_masks = None
if self.is_rnn:
rnn_masks = input_dict['rnn_masks']
batch_dict['rnn_states'] = input_dict['rnn_states']
batch_dict['seq_length'] = self.seq_len
with torch.cuda.amp.autocast(enabled=self.mixed_precision):
res_dict = self.model(batch_dict)
action_log_probs = res_dict['prev_neglogp']
values = res_dict['values']
entropy = res_dict['entropy']
mu = res_dict['mus']
sigma = res_dict['sigmas']
disc_agent_logit = res_dict['disc_agent_logit']
disc_agent_replay_logit = res_dict['disc_agent_replay_logit']
disc_demo_logit = res_dict['disc_demo_logit']
a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
a_loss = a_info['actor_loss']
a_clipped = a_info['actor_clipped'].float()
c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
c_loss = c_info['critic_loss']
b_loss = self.bound_loss(mu)
c_loss = torch.mean(c_loss)
a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum
entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum
b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum
a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum
disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0)
disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo)
disc_loss = disc_info['disc_loss']
loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \
+ self._disc_coef * disc_loss
a_info['actor_loss'] = a_loss
a_info['actor_clip_frac'] = a_clip_frac
c_info['critic_loss'] = c_loss
if self.multi_gpu:
self.optimizer.zero_grad()
else:
for param in self.model.parameters():
param.grad = None
self.scaler.scale(loss).backward()
#TODO: Refactor this ugliest code of the year
if self.truncate_grads:
if self.multi_gpu:
self.optimizer.synchronize()
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
with self.optimizer.skip_synchronize():
self.scaler.step(self.optimizer)
self.scaler.update()
else:
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
self.scaler.step(self.optimizer)
self.scaler.update()
else:
self.scaler.step(self.optimizer)
self.scaler.update()
with torch.no_grad():
reduce_kl = not self.is_rnn
kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
if self.is_rnn:
kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask
self.train_result = {
'entropy': entropy,
'kl': kl_dist,
'last_lr': self.last_lr,
'lr_mul': lr_mul,
'b_loss': b_loss
}
self.train_result.update(a_info)
self.train_result.update(c_info)
self.train_result.update(disc_info)
return
def _load_config_params(self, config):
super()._load_config_params(config)
# when eps greedy is enabled, rollouts will be generated using a mixture of
# a deterministic and stochastic actions. The deterministic actions help to
# produce smoother, less noisy, motions that can be used to train a better
# discriminator. If the discriminator is only trained with jittery motions
# from noisy actions, it can learn to phone in on the jitteriness to
# differential between real and fake samples.
self._enable_eps_greedy = bool(config['enable_eps_greedy'])
self._task_reward_w = config['task_reward_w']
self._disc_reward_w = config['disc_reward_w']
self._amp_observation_space = self.env_info['amp_observation_space']
self._amp_batch_size = int(config['amp_batch_size'])
self._amp_minibatch_size = int(config['amp_minibatch_size'])
assert(self._amp_minibatch_size <= self.minibatch_size)
self._disc_coef = config['disc_coef']
self._disc_logit_reg = config['disc_logit_reg']
self._disc_grad_penalty = config['disc_grad_penalty']
self._disc_weight_decay = config['disc_weight_decay']
self._disc_reward_scale = config['disc_reward_scale']
self._normalize_amp_input = config.get('normalize_amp_input', True)
return
def _build_net_config(self):
config = super()._build_net_config()
config['amp_input_shape'] = self._amp_observation_space.shape
return config
def _build_rand_action_probs(self):
num_envs = self.vec_env.env.task.num_envs
env_ids = to_torch(np.arange(num_envs), dtype=torch.float32, device=self.ppo_device)
self._rand_action_probs = 1.0 - torch.exp(10 * (env_ids / (num_envs - 1.0) - 1.0))
self._rand_action_probs[0] = 1.0
self._rand_action_probs[-1] = 0.0
if not self._enable_eps_greedy:
self._rand_action_probs[:] = 1.0
return
def _init_train(self):
super()._init_train()
self._init_amp_demo_buf()
return
def _disc_loss(self, disc_agent_logit, disc_demo_logit, obs_demo):
# prediction loss
disc_loss_agent = self._disc_loss_neg(disc_agent_logit)
disc_loss_demo = self._disc_loss_pos(disc_demo_logit)
disc_loss = 0.5 * (disc_loss_agent + disc_loss_demo)
# logit reg
logit_weights = self.model.a2c_network.get_disc_logit_weights()
disc_logit_loss = torch.sum(torch.square(logit_weights))
disc_loss += self._disc_logit_reg * disc_logit_loss
# grad penalty
disc_demo_grad = torch.autograd.grad(disc_demo_logit, obs_demo, grad_outputs=torch.ones_like(disc_demo_logit),
create_graph=True, retain_graph=True, only_inputs=True)
disc_demo_grad = disc_demo_grad[0]
disc_demo_grad = torch.sum(torch.square(disc_demo_grad), dim=-1)
disc_grad_penalty = torch.mean(disc_demo_grad)
disc_loss += self._disc_grad_penalty * disc_grad_penalty
# weight decay
if (self._disc_weight_decay != 0):
disc_weights = self.model.a2c_network.get_disc_weights()
disc_weights = torch.cat(disc_weights, dim=-1)
disc_weight_decay = torch.sum(torch.square(disc_weights))
disc_loss += self._disc_weight_decay * disc_weight_decay
disc_agent_acc, disc_demo_acc = self._compute_disc_acc(disc_agent_logit, disc_demo_logit)
disc_info = {
'disc_loss': disc_loss,
'disc_grad_penalty': disc_grad_penalty.detach(),
'disc_logit_loss': disc_logit_loss.detach(),
'disc_agent_acc': disc_agent_acc.detach(),
'disc_demo_acc': disc_demo_acc.detach(),
'disc_agent_logit': disc_agent_logit.detach(),
'disc_demo_logit': disc_demo_logit.detach()
}
return disc_info
def _disc_loss_neg(self, disc_logits):
bce = torch.nn.BCEWithLogitsLoss()
loss = bce(disc_logits, torch.zeros_like(disc_logits))
return loss
def _disc_loss_pos(self, disc_logits):
bce = torch.nn.BCEWithLogitsLoss()
loss = bce(disc_logits, torch.ones_like(disc_logits))
return loss
def _compute_disc_acc(self, disc_agent_logit, disc_demo_logit):
agent_acc = disc_agent_logit < 0
agent_acc = torch.mean(agent_acc.float())
demo_acc = disc_demo_logit > 0
demo_acc = torch.mean(demo_acc.float())
return agent_acc, demo_acc
def _fetch_amp_obs_demo(self, num_samples):
amp_obs_demo = self.vec_env.env.fetch_amp_obs_demo(num_samples)
return amp_obs_demo
def _build_amp_buffers(self):
batch_shape = self.experience_buffer.obs_base_shape
self.experience_buffer.tensor_dict['amp_obs'] = torch.zeros(batch_shape + self._amp_observation_space.shape,
device=self.ppo_device)
self.experience_buffer.tensor_dict['rand_action_mask'] = torch.zeros(batch_shape, dtype=torch.float32, device=self.ppo_device)
amp_obs_demo_buffer_size = int(self.config['amp_obs_demo_buffer_size'])
self._amp_obs_demo_buffer = replay_buffer.ReplayBuffer(amp_obs_demo_buffer_size, self.ppo_device)
self._amp_replay_keep_prob = self.config['amp_replay_keep_prob']
replay_buffer_size = int(self.config['amp_replay_buffer_size'])
self._amp_replay_buffer = replay_buffer.ReplayBuffer(replay_buffer_size, self.ppo_device)
self._build_rand_action_probs()
self.tensor_list += ['amp_obs', 'rand_action_mask']
return
def _init_amp_demo_buf(self):
buffer_size = self._amp_obs_demo_buffer.get_buffer_size()
num_batches = int(np.ceil(buffer_size / self._amp_batch_size))
for i in range(num_batches):
curr_samples = self._fetch_amp_obs_demo(self._amp_batch_size)
self._amp_obs_demo_buffer.store({'amp_obs': curr_samples})
return
def _update_amp_demos(self):
new_amp_obs_demo = self._fetch_amp_obs_demo(self._amp_batch_size)
self._amp_obs_demo_buffer.store({'amp_obs': new_amp_obs_demo})
return
def _preproc_amp_obs(self, amp_obs):
if self._normalize_amp_input:
amp_obs = self._amp_input_mean_std(amp_obs)
return amp_obs
def _combine_rewards(self, task_rewards, amp_rewards):
disc_r = amp_rewards['disc_rewards']
combined_rewards = self._task_reward_w * task_rewards + \
+ self._disc_reward_w * disc_r
return combined_rewards
def _eval_disc(self, amp_obs):
proc_amp_obs = self._preproc_amp_obs(amp_obs)
return self.model.a2c_network.eval_disc(proc_amp_obs)
def _calc_advs(self, batch_dict):
returns = batch_dict['returns']
values = batch_dict['values']
rand_action_mask = batch_dict['rand_action_mask']
advantages = returns - values
advantages = torch.sum(advantages, axis=1)
if self.normalize_advantage:
advantages = torch_ext.normalization_with_masks(advantages, rand_action_mask)
return advantages
def _calc_amp_rewards(self, amp_obs):
disc_r = self._calc_disc_rewards(amp_obs)
output = {
'disc_rewards': disc_r
}
return output
def _calc_disc_rewards(self, amp_obs):
with torch.no_grad():
disc_logits = self._eval_disc(amp_obs)
prob = 1 / (1 + torch.exp(-disc_logits))
disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.ppo_device)))
disc_r *= self._disc_reward_scale
return disc_r
def _store_replay_amp_obs(self, amp_obs):
buf_size = self._amp_replay_buffer.get_buffer_size()
buf_total_count = self._amp_replay_buffer.get_total_count()
if (buf_total_count > buf_size):
keep_probs = to_torch(np.array([self._amp_replay_keep_prob] * amp_obs.shape[0]), device=self.ppo_device)
keep_mask = torch.bernoulli(keep_probs) == 1.0
amp_obs = amp_obs[keep_mask]
if (amp_obs.shape[0] > buf_size):
rand_idx = torch.randperm(amp_obs.shape[0])
rand_idx = rand_idx[:buf_size]
amp_obs = amp_obs[rand_idx]
self._amp_replay_buffer.store({'amp_obs': amp_obs})
return
def _record_train_batch_info(self, batch_dict, train_info):
super()._record_train_batch_info(batch_dict, train_info)
train_info['disc_rewards'] = batch_dict['disc_rewards']
return
def _log_train_info(self, train_info, frame):
super()._log_train_info(train_info, frame)
self.writer.add_scalar('losses/disc_loss', torch_ext.mean_list(train_info['disc_loss']).item(), frame)
self.writer.add_scalar('info/disc_agent_acc', torch_ext.mean_list(train_info['disc_agent_acc']).item(), frame)
self.writer.add_scalar('info/disc_demo_acc', torch_ext.mean_list(train_info['disc_demo_acc']).item(), frame)
self.writer.add_scalar('info/disc_agent_logit', torch_ext.mean_list(train_info['disc_agent_logit']).item(), frame)
self.writer.add_scalar('info/disc_demo_logit', torch_ext.mean_list(train_info['disc_demo_logit']).item(), frame)
self.writer.add_scalar('info/disc_grad_penalty', torch_ext.mean_list(train_info['disc_grad_penalty']).item(), frame)
self.writer.add_scalar('info/disc_logit_loss', torch_ext.mean_list(train_info['disc_logit_loss']).item(), frame)
disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards'])
self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame)
self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame)
return
def _amp_debug(self, info):
with torch.no_grad():
amp_obs = info['amp_obs']
amp_obs = amp_obs[0:1]
disc_pred = self._eval_disc(amp_obs)
amp_rewards = self._calc_amp_rewards(amp_obs)
disc_reward = amp_rewards['disc_rewards']
disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
disc_reward = disc_reward.cpu().numpy()[0, 0]
print("disc_pred: ", disc_pred, disc_reward)
return
================================================
FILE: timechamber/ase/utils/amp_datasets.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
from rl_games.common import datasets
class AMPDataset(datasets.PPODataset):
def __init__(self, batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len):
super().__init__(batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len)
self._idx_buf = torch.randperm(batch_size)
return
def update_mu_sigma(self, mu, sigma):
raise NotImplementedError()
return
def _get_item(self, idx):
start = idx * self.minibatch_size
end = (idx + 1) * self.minibatch_size
sample_idx = self._idx_buf[start:end]
input_dict = {}
for k,v in self.values_dict.items():
if k not in self.special_names and v is not None:
input_dict[k] = v[sample_idx]
if (end >= self.batch_size):
self._shuffle_idx_buf()
return input_dict
def _shuffle_idx_buf(self):
self._idx_buf[:] = torch.randperm(self.batch_size)
return
================================================
FILE: timechamber/ase/utils/amp_models.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch.nn as nn
from rl_games.algos_torch.models import ModelA2CContinuousLogStd
class ModelAMPContinuous(ModelA2CContinuousLogStd):
def __init__(self, network):
super().__init__(network)
return
def build(self, config):
net = self.network_builder.build('amp', **config)
for name, _ in net.named_parameters():
print(name)
# print(f"AMP config: {config}")
obs_shape = config['input_shape']
normalize_value = config.get('normalize_value', False)
normalize_input = config.get('normalize_input', False)
value_size = config.get('value_size', 1)
return ModelAMPContinuous.Network(net, obs_shape=obs_shape, normalize_value=normalize_value,
normalize_input=normalize_input, value_size=value_size)
class Network(ModelA2CContinuousLogStd.Network):
def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size):
super().__init__(a2c_network, obs_shape=obs_shape,
normalize_value=normalize_value,
normalize_input=normalize_input,
value_size=value_size)
return
def forward(self, input_dict):
is_train = input_dict.get('is_train', True)
result = super().forward(input_dict)
if (is_train):
amp_obs = input_dict['amp_obs']
disc_agent_logit = self.a2c_network.eval_disc(amp_obs)
result["disc_agent_logit"] = disc_agent_logit
amp_obs_replay = input_dict['amp_obs_replay']
disc_agent_replay_logit = self.a2c_network.eval_disc(amp_obs_replay)
result["disc_agent_replay_logit"] = disc_agent_replay_logit
amp_demo_obs = input_dict['amp_obs_demo']
disc_demo_logit = self.a2c_network.eval_disc(amp_demo_obs)
result["disc_demo_logit"] = disc_demo_logit
return result
def eval_actor(self, obs):
processed_obs = self.norm_obs(obs)
mu, sigma = self.a2c_network.eval_actor(obs=processed_obs)
return mu, sigma
def eval_critic(self, obs):
processed_obs = self.norm_obs(obs)
value = self.a2c_network.eval_critic(processed_obs)
return value
================================================
FILE: timechamber/ase/utils/amp_network_builder.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import layers
from rl_games.algos_torch import network_builder
import torch
import torch.nn as nn
import numpy as np
DISC_LOGIT_INIT_SCALE = 1.0
class AMPBuilder(network_builder.A2CBuilder):
def __init__(self, **kwargs):
super().__init__(**kwargs)
return
class Network(network_builder.A2CBuilder.Network):
def __init__(self, params, **kwargs):
super().__init__(params, **kwargs)
if self.is_continuous:
if (not self.space_config['learn_sigma']):
actions_num = kwargs.get('actions_num')
sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
sigma_init(self.sigma)
amp_input_shape = kwargs.get('amp_input_shape')
self._build_disc(amp_input_shape)
return
def load(self, params):
super().load(params)
self._disc_units = params['disc']['units']
self._disc_activation = params['disc']['activation']
self._disc_initializer = params['disc']['initializer']
return
def forward(self, obs_dict):
obs = obs_dict['obs']
states = obs_dict.get('rnn_states', None)
actor_outputs = self.eval_actor(obs)
value = self.eval_critic(obs)
output = actor_outputs + (value, states)
return output
def eval_actor(self, obs):
a_out = self.actor_cnn(obs)
a_out = a_out.contiguous().view(a_out.size(0), -1)
a_out = self.actor_mlp(a_out)
if self.is_discrete:
logits = self.logits(a_out)
return logits
if self.is_multi_discrete:
logits = [logit(a_out) for logit in self.logits]
return logits
if self.is_continuous:
mu = self.mu_act(self.mu(a_out))
if self.space_config['fixed_sigma']:
sigma = mu * 0.0 + self.sigma_act(self.sigma)
else:
sigma = self.sigma_act(self.sigma(a_out))
return mu, sigma
return
def eval_critic(self, obs):
c_out = self.critic_cnn(obs)
c_out = c_out.contiguous().view(c_out.size(0), -1)
c_out = self.critic_mlp(c_out)
value = self.value_act(self.value(c_out))
return value
def eval_disc(self, amp_obs):
disc_mlp_out = self._disc_mlp(amp_obs)
disc_logits = self._disc_logits(disc_mlp_out)
return disc_logits
def get_disc_logit_weights(self):
return torch.flatten(self._disc_logits.weight)
def get_disc_weights(self):
weights = []
for m in self._disc_mlp.modules():
if isinstance(m, nn.Linear):
weights.append(torch.flatten(m.weight))
weights.append(torch.flatten(self._disc_logits.weight))
return weights
def _build_disc(self, input_shape):
self._disc_mlp = nn.Sequential()
mlp_args = {
'input_size' : input_shape[0],
'units' : self._disc_units,
'activation' : self._disc_activation,
'dense_func' : torch.nn.Linear
}
self._disc_mlp = self._build_mlp(**mlp_args)
mlp_out_size = self._disc_units[-1]
self._disc_logits = torch.nn.Linear(mlp_out_size, 1)
mlp_init = self.init_factory.create(**self._disc_initializer)
for m in self._disc_mlp.modules():
if isinstance(m, nn.Linear):
mlp_init(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
torch.nn.init.uniform_(self._disc_logits.weight, -DISC_LOGIT_INIT_SCALE, DISC_LOGIT_INIT_SCALE)
torch.nn.init.zeros_(self._disc_logits.bias)
return
def build(self, name, **kwargs):
net = AMPBuilder.Network(self.params, **kwargs)
return net
================================================
FILE: timechamber/ase/utils/amp_players.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
import timechamber.ase.utils.common_player as common_player
class AMPPlayerContinuous(common_player.CommonPlayer):
def __init__(self, params):
config = params['config']
self._normalize_amp_input = config.get('normalize_amp_input', True)
self._disc_reward_scale = config['disc_reward_scale']
super().__init__(params)
return
def restore(self, fn):
if (fn != 'Base'):
super().restore(fn)
if self._normalize_amp_input:
checkpoint = torch_ext.load_checkpoint(fn)
self._amp_input_mean_std.load_state_dict(checkpoint['amp_input_mean_std'])
return
def _build_net(self, config):
super()._build_net(config)
if self._normalize_amp_input:
self._amp_input_mean_std = RunningMeanStd(config['amp_input_shape']).to(self.device)
self._amp_input_mean_std.eval()
return
def _post_step(self, info):
super()._post_step(info)
if (self.env.task.viewer):
self._amp_debug(info)
return
def _build_net_config(self):
config = super()._build_net_config()
if (hasattr(self, 'env')) and self.env is not None:
config['amp_input_shape'] = self.env.amp_observation_space.shape
else:
config['amp_input_shape'] = self.env_info['amp_observation_space']
return config
def _amp_debug(self, info):
with torch.no_grad():
amp_obs = info['amp_obs']
amp_obs = amp_obs[0:1]
disc_pred = self._eval_disc(amp_obs)
amp_rewards = self._calc_amp_rewards(amp_obs)
disc_reward = amp_rewards['disc_rewards']
disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
disc_reward = disc_reward.cpu().numpy()[0, 0]
print("disc_pred: ", disc_pred, disc_reward)
return
def _preproc_amp_obs(self, amp_obs):
if self._normalize_amp_input:
amp_obs = self._amp_input_mean_std(amp_obs)
return amp_obs
def _eval_disc(self, amp_obs):
proc_amp_obs = self._preproc_amp_obs(amp_obs)
return self.model.a2c_network.eval_disc(proc_amp_obs)
def _calc_amp_rewards(self, amp_obs):
disc_r = self._calc_disc_rewards(amp_obs)
output = {
'disc_rewards': disc_r
}
return output
def _calc_disc_rewards(self, amp_obs):
with torch.no_grad():
disc_logits = self._eval_disc(amp_obs)
prob = 1 / (1 + torch.exp(-disc_logits))
disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.device)))
disc_r *= self._disc_reward_scale
return disc_r
================================================
FILE: timechamber/ase/utils/common_agent.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import copy
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
import yaml
from rl_games.algos_torch import a2c_continuous
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common import a2c_common
from rl_games.common import datasets
from rl_games.common import schedulers
from rl_games.common import vecenv
import torch
from torch import optim
import timechamber.ase.utils.amp_datasets as amp_datasets
from timechamber.utils.utils import load_check, load_checkpoint
from tensorboardX import SummaryWriter
class CommonAgent(a2c_continuous.A2CAgent):
def __init__(self, base_name, params):
a2c_common.A2CBase.__init__(self, base_name, params)
self.config = config = params['config']
self._load_config_params(config)
self.is_discrete = False
self._setup_action_space()
self.bounds_loss_coef = config.get('bounds_loss_coef', None)
self.clip_actions = config.get('clip_actions', True)
self._save_intermediate = config.get('save_intermediate', False)
net_config = self._build_net_config()
self.model = self.network.build(net_config)
self.model.to(self.ppo_device)
self.states = None
self.init_rnn_from_model(self.model)
self.last_lr = float(self.last_lr)
self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)
if self.normalize_input:
obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
self.running_mean_std = RunningMeanStd(obs_shape).to(self.ppo_device)
if self.normalize_value:
self.value_mean_std = self.central_value_net.model.value_mean_std if self.has_central_value else self.model.value_mean_std
if self.has_central_value:
cv_config = {
'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape),
'value_size' : self.value_size,
'ppo_device' : self.ppo_device,
'num_agents' : self.num_agents,
'horizon_length' : self.horizon_length,
'num_actors' : self.num_actors,
'num_actions' : self.actions_num,
'seq_len' : self.seq_len,
'model' : self.central_value_config['network'],
'config' : self.central_value_config,
'writter' : self.writer,
'multi_gpu' : self.multi_gpu
}
self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device)
self.use_experimental_cv = self.config.get('use_experimental_cv', True)
self.dataset = amp_datasets.AMPDataset(self.batch_size, self.minibatch_size, self.is_discrete, self.is_rnn, self.ppo_device, self.seq_len)
self.algo_observer.after_init(self)
return
def init_tensors(self):
super().init_tensors()
self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses'])
self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values'])
self.tensor_list += ['next_obses']
return
def train(self):
self.init_tensors()
self.last_mean_rewards = -100500
start_time = time.time()
total_time = 0
rep_count = 0
self.frame = 0
self.obs = self.env_reset()
self.curr_frames = self.batch_size_envs
model_output_file = os.path.join(self.nn_dir, self.config['name'])
if self.multi_gpu:
self.hvd.setup_algo(self)
self._init_train()
while True:
epoch_num = self.update_epoch()
train_info = self.train_epoch()
sum_time = train_info['total_time']
total_time += sum_time
frame = self.frame
if self.multi_gpu:
self.hvd.sync_stats(self)
if self.rank == 0:
scaled_time = sum_time
scaled_play_time = train_info['play_time']
curr_frames = self.curr_frames
self.frame += curr_frames
if self.print_stats:
fps_step = curr_frames / scaled_play_time
fps_total = curr_frames / scaled_time
print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}')
self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame)
self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame)
self.writer.add_scalar('info/epochs', epoch_num, frame)
self._log_train_info(train_info, frame)
self.algo_observer.after_print_stats(frame, epoch_num, total_time)
if self.game_rewards.current_size > 0:
mean_rewards = self._get_mean_rewards()
mean_lengths = self.game_lengths.get_mean()
for i in range(self.value_size):
self.writer.add_scalar('rewards{0}/frame'.format(i), mean_rewards[i], frame)
self.writer.add_scalar('rewards{0}/iter'.format(i), mean_rewards[i], epoch_num)
self.writer.add_scalar('rewards{0}/time'.format(i), mean_rewards[i], total_time)
self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame)
self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
if self.has_self_play_config:
self.self_play_manager.update(self)
if self.save_freq > 0:
if (epoch_num % self.save_freq == 0):
self.save(model_output_file)
if (self._save_intermediate):
int_model_output_file = model_output_file + '_' + str(epoch_num).zfill(8)
self.save(int_model_output_file)
if epoch_num > self.max_epochs:
self.save(model_output_file)
print('MAX EPOCHS NUM!')
return self.last_mean_rewards, epoch_num
update_time = 0
return
def set_full_state_weights(self, weights):
self.set_weights(weights)
self.epoch_num = weights['epoch']
if self.has_central_value:
self.central_value_net.load_state_dict(weights['assymetric_vf_nets'])
self.optimizer.load_state_dict(weights['optimizer'])
self.frame = weights.get('frame', 0)
self.last_mean_rewards = weights.get('last_mean_rewards', -100500)
if self.vec_env is not None:
env_state = weights.get('env_state', None)
self.vec_env.set_env_state(env_state)
return
def restore(self, fn):
checkpoint = load_checkpoint(fn, device=self.device)
checkpoint = load_check(checkpoint=checkpoint,
normalize_input=self.normalize_input,
normalize_value=self.normalize_value)
self.set_full_state_weights(checkpoint)
def train_epoch(self):
play_time_start = time.time()
with torch.no_grad():
if self.is_rnn:
batch_dict = self.play_steps_rnn()
else:
batch_dict = self.play_steps()
play_time_end = time.time()
update_time_start = time.time()
rnn_masks = batch_dict.get('rnn_masks', None)
self.set_train()
self.curr_frames = batch_dict.pop('played_frames')
self.prepare_dataset(batch_dict)
self.algo_observer.after_steps()
if self.has_central_value:
self.train_central_value()
train_info = None
if self.is_rnn:
frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
print(frames_mask_ratio)
for _ in range(0, self.mini_epochs_num):
ep_kls = []
for i in range(len(self.dataset)):
curr_train_info = self.train_actor_critic(self.dataset[i])
if self.schedule_type == 'legacy':
if self.multi_gpu:
curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
self.update_lr(self.last_lr)
if (train_info is None):
train_info = dict()
for k, v in curr_train_info.items():
train_info[k] = [v]
else:
for k, v in curr_train_info.items():
train_info[k].append(v)
av_kls = torch_ext.mean_list(train_info['kl'])
if self.schedule_type == 'standard':
if self.multi_gpu:
av_kls = self.hvd.average_value(av_kls, 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
self.update_lr(self.last_lr)
if self.schedule_type == 'standard_epoch':
if self.multi_gpu:
av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
self.update_lr(self.last_lr)
update_time_end = time.time()
play_time = play_time_end - play_time_start
update_time = update_time_end - update_time_start
total_time = update_time_end - play_time_start
train_info['step_time'] = batch_dict['step_time']
train_info['play_time'] = play_time
train_info['update_time'] = update_time
train_info['total_time'] = total_time
self._record_train_batch_info(batch_dict, train_info)
return train_info
def play_steps(self):
self.set_eval()
epinfos = []
done_indices = []
update_list = self.update_list
for n in range(self.horizon_length):
self.obs = self.env_reset(done_indices)
self.experience_buffer.update_data('obses', n, self.obs['obs'])
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, masks)
else:
res_dict = self.get_action_values(self.obs)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
shaped_rewards = self.rewards_shaper(rewards)
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
terminated = infos['terminate'].float()
terminated = terminated.unsqueeze(-1)
next_vals = self._eval_critic(self.obs)
next_vals *= (1.0 - terminated)
self.experience_buffer.update_data('next_values', n, next_vals)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
self.game_rewards.update(self.current_rewards[done_indices])
self.game_lengths.update(self.current_lengths[done_indices])
self.algo_observer.process_infos(infos, done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
done_indices = done_indices[:, 0]
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_next_values = self.experience_buffer.tensor_dict['next_values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
return batch_dict
def prepare_dataset(self, batch_dict):
obses = batch_dict['obses']
returns = batch_dict['returns']
dones = batch_dict['dones']
values = batch_dict['values']
actions = batch_dict['actions']
neglogpacs = batch_dict['neglogpacs']
mus = batch_dict['mus']
sigmas = batch_dict['sigmas']
rnn_states = batch_dict.get('rnn_states', None)
rnn_masks = batch_dict.get('rnn_masks', None)
advantages = self._calc_advs(batch_dict)
if self.normalize_value:
self.value_mean_std.train()
values = self.value_mean_std(values)
returns = self.value_mean_std(returns)
self.value_mean_std.eval()
dataset_dict = {}
dataset_dict['old_values'] = values
dataset_dict['old_logp_actions'] = neglogpacs
dataset_dict['advantages'] = advantages
dataset_dict['returns'] = returns
dataset_dict['actions'] = actions
dataset_dict['obs'] = obses
dataset_dict['rnn_states'] = rnn_states
dataset_dict['rnn_masks'] = rnn_masks
dataset_dict['mu'] = mus
dataset_dict['sigma'] = sigmas
self.dataset.update_values_dict(dataset_dict)
if self.has_central_value:
dataset_dict = {}
dataset_dict['old_values'] = values
dataset_dict['advantages'] = advantages
dataset_dict['returns'] = returns
dataset_dict['actions'] = actions
dataset_dict['obs'] = batch_dict['states']
dataset_dict['rnn_masks'] = rnn_masks
self.central_value_net.update_dataset(dataset_dict)
return
def calc_gradients(self, input_dict):
self.set_train()
value_preds_batch = input_dict['old_values']
old_action_log_probs_batch = input_dict['old_logp_actions']
advantage = input_dict['advantages']
old_mu_batch = input_dict['mu']
old_sigma_batch = input_dict['sigma']
return_batch = input_dict['returns']
actions_batch = input_dict['actions']
obs_batch = input_dict['obs']
obs_batch = self._preproc_obs(obs_batch)
lr = self.last_lr
kl = 1.0
lr_mul = 1.0
curr_e_clip = lr_mul * self.e_clip
batch_dict = {
'is_train': True,
'prev_actions': actions_batch,
'obs' : obs_batch
}
rnn_masks = None
if self.is_rnn:
rnn_masks = input_dict['rnn_masks']
batch_dict['rnn_states'] = input_dict['rnn_states']
batch_dict['seq_length'] = self.seq_len
with torch.cuda.amp.autocast(enabled=self.mixed_precision):
res_dict = self.model(batch_dict)
action_log_probs = res_dict['prev_neglogp']
values = res_dict['values']
entropy = res_dict['entropy']
mu = res_dict['mus']
sigma = res_dict['sigmas']
a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
a_loss = a_info['actor_loss']
c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
c_loss = c_info['critic_loss']
b_loss = self.bound_loss(mu)
a_loss = torch.mean(a_loss)
c_loss = torch.mean(c_loss)
b_loss = torch.mean(b_loss)
entropy = torch.mean(entropy)
loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss
a_clip_frac = torch.mean(a_info['actor_clipped'].float())
a_info['actor_loss'] = a_loss
a_info['actor_clip_frac'] = a_clip_frac
if self.multi_gpu:
self.optimizer.zero_grad()
else:
for param in self.model.parameters():
param.grad = None
self.scaler.scale(loss).backward()
self.scaler.step(self.optimizer)
self.scaler.update()
with torch.no_grad():
reduce_kl = not self.is_rnn
kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
self.train_result = {
'entropy': entropy,
'kl': kl_dist,
'last_lr': self.last_lr,
'lr_mul': lr_mul,
'b_loss': b_loss
}
self.train_result.update(a_info)
self.train_result.update(c_info)
return
def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values):
lastgaelam = 0
mb_advs = torch.zeros_like(mb_rewards)
for t in reversed(range(self.horizon_length)):
not_done = 1.0 - mb_fdones[t]
not_done = not_done.unsqueeze(1)
delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t]
lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam
mb_advs[t] = lastgaelam
return mb_advs
def env_reset(self, env_ids=None):
obs = self.vec_env.reset(env_ids)
obs = self.obs_to_tensors(obs)
return obs
def bound_loss(self, mu):
if self.bounds_loss_coef is not None:
soft_bound = 1.0
mu_loss_high = torch.clamp_min(mu - soft_bound, 0.0)**2
mu_loss_low = torch.clamp_max(mu + soft_bound, 0.0)**2
b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1)
else:
b_loss = 0
return b_loss
def _get_mean_rewards(self):
return self.game_rewards.get_mean()
def _load_config_params(self, config):
self.last_lr = config['learning_rate']
return
def _build_net_config(self):
obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
config = {
'actions_num' : self.actions_num,
'input_shape' : obs_shape,
'num_seqs' : self.num_actors * self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value' : self.normalize_value,
'normalize_input': self.normalize_input,
}
return config
def _setup_action_space(self):
action_space = self.env_info['action_space']
self.actions_num = action_space.shape[0]
# todo introduce device instead of cuda()
self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device)
self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device)
return
def _init_train(self):
return
def _eval_critic(self, obs_dict):
self.model.eval()
obs = obs_dict['obs']
processed_obs = self._preproc_obs(obs)
value = self.model.eval_critic(processed_obs)
return value
def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip):
ratio = torch.exp(old_action_log_probs_batch - action_log_probs)
surr1 = advantage * ratio
surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip,
1.0 + curr_e_clip)
a_loss = torch.max(-surr1, -surr2)
clipped = torch.abs(ratio - 1.0) > curr_e_clip
clipped = clipped.detach()
info = {
'actor_loss': a_loss,
'actor_clipped': clipped.detach()
}
return info
def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value):
if clip_value:
value_pred_clipped = value_preds_batch + \
(values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip)
value_losses = (values - return_batch)**2
value_losses_clipped = (value_pred_clipped - return_batch)**2
c_loss = torch.max(value_losses, value_losses_clipped)
else:
c_loss = (return_batch - values)**2
info = {
'critic_loss': c_loss
}
return info
def _calc_advs(self, batch_dict):
returns = batch_dict['returns']
values = batch_dict['values']
advantages = returns - values
advantages = torch.sum(advantages, axis=1)
if self.normalize_advantage:
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
return advantages
def _record_train_batch_info(self, batch_dict, train_info):
return
def _log_train_info(self, train_info, frame):
self.writer.add_scalar('performance/update_time', train_info['update_time'], frame)
self.writer.add_scalar('performance/play_time', train_info['play_time'], frame)
self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame)
self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame)
self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame)
self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame)
self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame)
self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame)
self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame)
self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame)
self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame)
return
================================================
FILE: timechamber/ase/utils/common_player.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
from rl_games.algos_torch import players
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common.player import BasePlayer
from timechamber.utils.utils import load_check, load_checkpoint
import numpy as np
class CommonPlayer(players.PpoPlayerContinuous):
def __init__(self, params):
config = params['config']
BasePlayer.__init__(self, params)
self.network = config['network']
self._setup_action_space()
self.mask = [False]
self.normalize_input = self.config['normalize_input']
self.normalize_value = self.config.get('normalize_value', False)
net_config = self._build_net_config()
self._build_net(net_config)
return
def run(self):
n_games = self.games_num
render = self.render_env
n_game_life = self.n_game_life
is_determenistic = self.is_determenistic
sum_rewards = 0
sum_steps = 0
sum_game_res = 0
n_games = n_games * n_game_life
games_played = 0
has_masks = False
has_masks_func = getattr(self.env, "has_action_mask", None) is not None
op_agent = getattr(self.env, "create_agent", None)
if op_agent:
agent_inited = True
if has_masks_func:
has_masks = self.env.has_action_mask()
need_init_rnn = self.is_rnn
for _ in range(n_games):
if games_played >= n_games:
break
obs_dict = self.env_reset()
batch_size = 1
batch_size = self.get_batch_size(obs_dict['obs'], batch_size)
if need_init_rnn:
self.init_rnn()
need_init_rnn = False
cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
print_game_res = False
done_indices = []
for n in range(self.max_steps):
# obs_dict = self.env_reset(done_indices)
if has_masks:
masks = self.env.get_action_mask()
action = self.get_masked_action(obs_dict, masks, is_determenistic)
else:
action = self.get_action(obs_dict, is_determenistic)
obs_dict, r, done, info = self.env_step(self.env, action)
obs_dict = {'obs': obs_dict}
# print('obs_dict shape: ', obs_dict.shape)
cr += r
steps += 1
self._post_step(info)
if render:
self.env.render(mode = 'human')
time.sleep(self.render_sleep)
all_done_indices = done.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
done_count = len(done_indices)
games_played += done_count
if done_count > 0:
if self.is_rnn:
for s in self.states:
s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0
cur_rewards = cr[done_indices].sum().item()
cur_steps = steps[done_indices].sum().item()
cr = cr * (1.0 - done.float())
steps = steps * (1.0 - done.float())
sum_rewards += cur_rewards
sum_steps += cur_steps
game_res = 0.0
if isinstance(info, dict):
if 'battle_won' in info:
print_game_res = True
game_res = info.get('battle_won', 0.5)
if 'scores' in info:
print_game_res = True
game_res = info.get('scores', 0.5)
if self.print_stats:
if print_game_res:
print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
else:
print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)
sum_game_res += game_res
if batch_size//self.num_agents == 1 or games_played >= n_games:
break
done_indices = done_indices[:, 0]
print(sum_rewards)
if print_game_res:
print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
else:
print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)
return
def get_action(self, obs_dict, is_determenistic = False):
output = super().get_action(obs_dict['obs'], is_determenistic)
return output
def env_step(self, env, actions):
if not self.is_tensor_obses:
actions = actions.cpu().numpy()
obs, rewards, dones, infos = env.step(actions)
if hasattr(obs, 'dtype') and obs.dtype == np.float64:
obs = np.float32(obs)
if self.value_size > 1:
rewards = rewards[0]
if self.is_tensor_obses:
return obs, rewards.to(self.device), dones.to(self.device), infos
else:
if np.isscalar(dones):
rewards = np.expand_dims(np.asarray(rewards), 0)
dones = np.expand_dims(np.asarray(dones), 0)
return self.obs_to_torch(obs), torch.from_numpy(rewards), torch.from_numpy(dones), infos
def _build_net(self, config):
self.model = self.network.build(config)
self.model.to(self.device)
self.model.eval()
self.is_rnn = self.model.is_rnn()
if self.normalize_input:
obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
self.running_mean_std = RunningMeanStd(obs_shape).to(self.device)
self.running_mean_std.eval()
return
def env_reset(self, env_ids=None):
obs = self.env.reset(env_ids)
return self.obs_to_torch(obs)
def _post_step(self, info):
return
def _build_net_config(self):
obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
config = {
'actions_num' : self.actions_num,
'input_shape' : obs_shape,
'num_seqs' : self.num_agents,
'normalize_input': self.normalize_input,
'normalize_value' : self.normalize_value,
}
return config
def restore(self, fn):
checkpoint = load_checkpoint(fn, device=self.device)
checkpoint = load_check(checkpoint=checkpoint,
normalize_input=self.normalize_input,
normalize_value=self.normalize_value)
self.model.load_state_dict(checkpoint['model'])
if self.normalize_input and 'running_mean_std' in checkpoint:
self.model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])
def _setup_action_space(self):
self.actions_num = self.action_space.shape[0]
self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
return
================================================
FILE: timechamber/ase/utils/replay_buffer.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
class ReplayBuffer():
def __init__(self, buffer_size, device):
self._head = 0
self._total_count = 0
self._buffer_size = buffer_size
self._device = device
self._data_buf = None
self._sample_idx = torch.randperm(buffer_size)
self._sample_head = 0
return
def reset(self):
self._head = 0
self._total_count = 0
self._reset_sample_idx()
return
def get_buffer_size(self):
return self._buffer_size
def get_total_count(self):
return self._total_count
def store(self, data_dict):
if (self._data_buf is None):
self._init_data_buf(data_dict)
n = next(iter(data_dict.values())).shape[0]
buffer_size = self.get_buffer_size()
assert(n <= buffer_size)
for key, curr_buf in self._data_buf.items():
curr_n = data_dict[key].shape[0]
assert(n == curr_n)
store_n = min(curr_n, buffer_size - self._head)
curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n]
remainder = n - store_n
if (remainder > 0):
curr_buf[0:remainder] = data_dict[key][store_n:]
self._head = (self._head + n) % buffer_size
self._total_count += n
return
def sample(self, n):
total_count = self.get_total_count()
buffer_size = self.get_buffer_size()
idx = torch.arange(self._sample_head, self._sample_head + n)
idx = idx % buffer_size
rand_idx = self._sample_idx[idx]
if (total_count < buffer_size):
rand_idx = rand_idx % self._head
samples = dict()
for k, v in self._data_buf.items():
samples[k] = v[rand_idx]
self._sample_head += n
if (self._sample_head >= buffer_size):
self._reset_sample_idx()
return samples
def _reset_sample_idx(self):
buffer_size = self.get_buffer_size()
self._sample_idx[:] = torch.randperm(buffer_size)
self._sample_head = 0
return
def _init_data_buf(self, data_dict):
buffer_size = self.get_buffer_size()
self._data_buf = dict()
for k, v in data_dict.items():
v_shape = v.shape[1:]
self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device)
return
================================================
FILE: timechamber/cfg/config.yaml
================================================
# Task name - used to pick the class to load
task_name: ${task.name}
# experiment name. defaults to name of training config
experiment: ''
# if set to positive integer, overrides the default number of environments
num_envs: ''
# seed - set to -1 to choose random seed
seed: 42
# set to True for deterministic performance
torch_deterministic: False
# set the maximum number of learning iterations to train for. overrides default per-environment setting
max_iterations: ''
# set minibatch_size
minibatch_size: 32768
## Device config
# 'physx' or 'flex'
physics_engine: 'physx'
# whether to use cpu or gpu pipeline
pipeline: 'gpu'
use_gpu: True
use_gpu_pipeline: True
# device for running physics simulation
sim_device: 'cuda:0'
# device to run RL
rl_device: 'cuda:0'
graphics_device_id: 0
device_type: cuda
## PhysX arguments
num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
solver_type: 1 # 0: pgs, 1: tgs
num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
# RLGames Arguments
# test - if set, run policy in inference mode (requires setting checkpoint to load)
test: False
# used to set checkpoint path
checkpoint: ''
op_checkpoint: ''
player_pool_type: ''
num_agents: 2
# HRL Arguments
motion_file: 'tasks/data/motions/reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy'
# set to True to use multi-gpu horovod training
multi_gpu: False
wandb_activate: False
wandb_group: ''
wandb_name: ${train.params.config.name}
wandb_entity: ''
wandb_project: 'timechamber'
capture_video: False
capture_video_freq: 1464
capture_video_len: 100
force_render: True
# disables rendering
headless: True
# set default task and default training config based on task
defaults:
- task: MA_Humanoid_Strike
- train: ${task}HRL
- hydra/job_logging: disabled
# set the directory where the output files get saved
hydra:
output_subdir: null
run:
dir: .
================================================
FILE: timechamber/cfg/task/MA_Ant_Battle.yaml
================================================
# used to create the object
name: MA_Ant_Battle
physics_engine: ${..physics_engine}
# if given, will override the device setting in gym.
env:
# numEnvs: ${...num_envs}
numEnvs: ${resolve_default:4096,${...num_envs}}
numAgents: ${...num_agents}
# rgb color of Ant body
color: [ [ 0.97, 0.38, 0.06 ],[ 0.24, 0.38, 0.06 ],[ 0.56, 0.85, 0.25 ],[ 0.56, 0.85, 0.25 ],[ 0.14, 0.97, 0.24 ],[ 0.63, 0.2, 0.87 ] ]
envSpacing: 6
borderlineSpace: 3
episodeLength: 1000
enableDebugVis: False
controlFrequencyInv: 1
clipActions: 1.0
clipObservations: 5.0
actionScale: 0.5
control:
# PD Drive parameters:
stiffness: 85.0 # [N*m/rad]
damping: 2.0 # [N*m*s/rad]
actionScale: 0.5
controlFrequencyInv: 1 # 60 Hz
# reward parameters
headingWeight: 0.5
upWeight: 0.1
# cost parameters
terminationHeight: 0.31
dofVelocityScale: 0.2
jointsAtLimitCost: -0.1
plane:
staticFriction: 1.0
dynamicFriction: 1.0
restitution: 0.0
asset:
assetFileName: "mjcf/nv_ant.xml"
# set to True if you use camera sensors in the environment
enableCameraSensors: False
sim:
dt: 0.0166 # 1/60 s
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [ 0.0, 0.0, -9.81 ]
physx:
num_threads: ${....num_threads}
solver_type: ${....solver_type}
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
num_position_iterations: 4
num_velocity_iterations: 0
contact_offset: 0.02
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 10.0
default_buffer_size_multiplier: 5.0
max_gpu_contact_pairs: 8388608 # 8*1024*1024
num_subscenes: ${....num_subscenes}
contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)
task:
randomize: False
randomization_params:
# specify which attributes to randomize for each actor type and property
frequency: 600 # Define how many environment steps between generating new randomizations
observations:
range: [ 0, .002 ] # range for the white noise
operation: "additive"
distribution: "gaussian"
actions:
range: [ 0., .02 ]
operation: "additive"
distribution: "gaussian"
actor_params:
ant:
color: True
rigid_body_properties:
mass:
range: [ 0.5, 1.5 ]
operation: "scaling"
distribution: "uniform"
setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
dof_properties:
damping:
range: [ 0.5, 1.5 ]
operation: "scaling"
distribution: "uniform"
stiffness:
range: [ 0.5, 1.5 ]
operation: "scaling"
distribution: "uniform"
lower:
range: [ 0, 0.01 ]
operation: "additive"
distribution: "gaussian"
upper:
range: [ 0, 0.01 ]
operation: "additive"
distribution: "gaussian"
================================================
FILE: timechamber/cfg/task/MA_Ant_Sumo.yaml
================================================
# used to create the object
name: MA_Ant_Sumo
physics_engine: ${..physics_engine}
# if given, will override the device setting in gym.
env:
# numEnvs: ${...num_envs}
numEnvs: ${resolve_default:4096,${...num_envs}}
numAgents: ${...num_agents}
envSpacing: 6
borderlineSpace: 3
episodeLength: 1000
enableDebugVis: False
controlFrequencyInv: 1
clipActions: 1.0
clipObservations: 5.0
actionScale: 0.5
control:
# PD Drive parameters:
stiffness: 85.0 # [N*m/rad]
damping: 2.0 # [N*m*s/rad]
actionScale: 0.5
controlFrequencyInv: 1 # 60 Hz
# reward parameters
headingWeight: 0.5
upWeight: 0.1
# cost parameters
terminationHeight: 0.31
dofVelocityScale: 0.2
jointsAtLimitCost: -0.1
plane:
staticFriction: 1.0
dynamicFriction: 1.0
restitution: 0.0
asset:
assetFileName: "mjcf/nv_ant.xml"
# set to True if you use camera sensors in the environment
enableCameraSensors: False
sim:
dt: 0.0166 # 1/60 s
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
physx:
num_threads: ${....num_threads}
solver_type: ${....solver_type}
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
num_position_iterations: 4
num_velocity_iterations: 0
contact_offset: 0.02
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 10.0
default_buffer_size_multiplier: 5.0
max_gpu_contact_pairs: 8388608 # 8*1024*1024
num_subscenes: ${....num_subscenes}
contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)
task:
randomize: False
randomization_params:
# specify which attributes to randomize for each actor type and property
frequency: 600 # Define how many environment steps between generating new randomizations
observations:
range: [0, .002] # range for the white noise
operation: "additive"
distribution: "gaussian"
actions:
range: [0., .02]
operation: "additive"
distribution: "gaussian"
actor_params:
ant:
color: True
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
dof_properties:
damping:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
stiffness:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
lower:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
upper:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
================================================
FILE: timechamber/cfg/task/MA_Humanoid_Strike.yaml
================================================
name: MA_Humanoid_Strike
physics_engine: ${..physics_engine}
# if given, will override the device setting in gym.
env:
numEnvs: ${resolve_default:4096,${...num_envs}}
envSpacing: 6
episodeLength: 1500
borderlineSpace: 3.0
numAgents: 2
isFlagrun: False
enableDebugVis: False
pdControl: True
powerScale: 1.0
controlFrequencyInv: 2 # 30 Hz
stateInit: "Default"
hybridInitProb: 0.5
numAMPObsSteps: 10
localRootObs: True
keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"]
contactBodies: ["right_foot", "left_foot"]
# forceBodies: ["torso", "right_upper_arm", "right_thigh", "right_shin", "left_thigh", "left_shin"]
forceBodies: ["torso", "right_thigh", "right_shin", "left_thigh", "left_shin"]
terminationHeight: 0.15
enableEarlyTermination: True
strikeBodyNames: ["sword", "shield", "right_hand", "right_lower_arm", "left_hand", "left_lower_arm"]
enableTaskObs: True
asset:
assetRoot: "tasks/data/assets"
assetFileName: "mjcf/amp_humanoid_sword_shield.xml"
plane:
staticFriction: 1.0
dynamicFriction: 1.0
restitution: 0.0
sim:
substeps: 2
physx:
num_threads: 4
solver_type: 1 # 0: pgs, 1: tgs
num_position_iterations: 4
num_velocity_iterations: 0
contact_offset: 0.02
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 10.0
default_buffer_size_multiplier: 10.0
flex:
num_inner_iterations: 10
warm_start: 0.25
================================================
FILE: timechamber/cfg/train/MA_Ant_BattlePPO.yaml
================================================
params:
seed: ${...seed}
algo:
name: self_play_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [ 256, 128, 64 ]
activation: elu
d2rl: False
initializer:
name: default
player_pool_type: ${...player_pool_type}
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself
num_agents: ${...num_agents}
update_win_rate: 0.7
player_pool_length: 4
games_to_check: 400
max_update_steps: 5000
device: ${...rl_device}
config:
name: ${resolve_default:MA_Ant_1v1,${....experiment}}
env_name: rlgpu
multi_gpu: ${....multi_gpu}
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
schedule_type: standard
kl_threshold: 0.008
score_to_win: 20000
max_epochs: ${resolve_default:2000,${....max_iterations}}
save_best_after: 200
save_frequency: 1000
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 64
minibatch_size: ${resolve_default:32768,${....minibatch_size}}
mini_epochs: 4
critic_coef: 2
clip_value: True
use_smooth_clamp: True
bounds_loss_coef: 0.0000
player:
games_num: 4000
record_elo: True
init_elo: 400
================================================
FILE: timechamber/cfg/train/MA_Ant_SumoPPO.yaml
================================================
params:
seed: ${...seed}
algo:
name: self_play_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [ 256, 128, 64 ]
activation: elu
d2rl: False
initializer:
name: default
# self play agent related
player_pool_type: ${...player_pool_type}
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself
num_agents: ${...num_agents}
update_win_rate: 0.7
player_pool_length: 2
games_to_check: 400
max_update_steps: 5000
device: ${...rl_device}
config:
name: ${resolve_default:MA_Ant_1v1,${....experiment}}
env_name: rlgpu
multi_gpu: ${....multi_gpu}
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
schedule_type: standard
kl_threshold: 0.008
score_to_win: 20000
max_epochs: ${resolve_default:100000,${....max_iterations}}
save_best_after: 200
save_frequency: 500
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 64
minibatch_size: ${resolve_default:32768,${....minibatch_size}}
mini_epochs: 4
critic_coef: 2
clip_value: True
use_smooth_clamp: True
bounds_loss_coef: 0.0000
player:
games_num: 4000
record_elo: True
init_elo: 400
================================================
FILE: timechamber/cfg/train/MA_Humanoid_StrikeHRL.yaml
================================================
params:
seed: ${...seed}
algo:
name: self_play_hrl
model:
name: hrl
network:
name: hrl
separate: True
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: -2.3
fixed_sigma: True
learn_sigma: False
mlp:
units: [1024, 512]
activation: relu
d2rl: False
initializer:
name: default
regularizer:
name: None
# self play agent related
player_pool_type: ${...player_pool_type}
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself
num_agents: ${...num_agents}
update_win_rate: 0.8
player_pool_length: 4
games_to_check: 400
max_update_steps: 5000
device: ${...rl_device}
config:
name: Humanoid
env_name: rlgpu
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 1
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 2e-5
lr_schedule: constant
score_to_win: 20000000
max_epochs: ${resolve_default:100000,${....max_iterations}}
save_best_after: 10
save_frequency: 50
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: False
e_clip: 0.2
horizon_length: 64
minibatch_size: ${resolve_default:64,${....minibatch_size}}
mini_epochs: 6
critic_coef: 5
clip_value: False
seq_len: 4
bounds_loss_coef: 10
task_reward_w: 0.9
disc_reward_w: 0.1
player:
determenistic: False
games_num: 4000
record_elo: True
init_elo: 400
llc_steps: 5
llc_config: cfg/train/base/ase_humanoid_hrl.yaml
llc_checkpoint: tasks/data/models/llc_reallusion_sword_shield.pth
================================================
FILE: timechamber/cfg/train/base/ase_humanoid_hrl.yaml
================================================
params:
seed: -1
algo:
name: ase
model:
name: ase
network:
name: ase
separate: True
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: -2.9
fixed_sigma: True
learn_sigma: False
mlp:
units: [1024, 1024, 512]
activation: relu
d2rl: False
initializer:
name: default
regularizer:
name: None
disc:
units: [1024, 1024, 512]
activation: relu
initializer:
name: default
enc:
units: [1024, 512]
activation: relu
separate: False
initializer:
name: default
load_checkpoint: False
config:
name: Humanoid
env_name: rlgpu
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
reward_shaper:
scale_value: 1
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 2e-5
lr_schedule: constant
score_to_win: 20000
max_epochs: 100000
save_best_after: 50
save_frequency: 50
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: False
ppo: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 1
mini_epochs: 6
critic_coef: 5
clip_value: False
seq_len: 4
bounds_loss_coef: 10
amp_obs_demo_buffer_size: 200000
amp_replay_buffer_size: 200000
amp_replay_keep_prob: 0.01
amp_batch_size: 32
amp_minibatch_size: 1
disc_coef: 5
disc_logit_reg: 0.01
disc_grad_penalty: 5
disc_reward_scale: 2
disc_weight_decay: 0.0001
normalize_amp_input: True
enable_eps_greedy: False
latent_dim: 64
latent_steps_min: 1
latent_steps_max: 150
amp_latent_grad_bonus: 0.00
amp_latent_grad_bonus_max: 100.0
amp_diversity_bonus: 0.01
amp_diversity_tar: 1.0
enc_coef: 5
enc_weight_decay: 0.0000
enc_reward_scale: 1
enc_grad_penalty: 0
task_reward_w: 0.0
disc_reward_w: 0.5
enc_reward_w: 0.5
================================================
FILE: timechamber/learning/common_agent.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import copy
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
import yaml
from rl_games.algos_torch import a2c_continuous
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common import a2c_common
from rl_games.common import datasets
from rl_games.common import schedulers
from rl_games.common import vecenv
import torch
from torch import optim
from tensorboardX import SummaryWriter
class CommonAgent(a2c_continuous.A2CAgent):
def __init__(self, base_name, params):
a2c_common.A2CBase.__init__(self, base_name, params)
config = params['config']
self._load_config_params(config)
self.is_discrete = False
self._setup_action_space()
self.bounds_loss_coef = config.get('bounds_loss_coef', None)
self.clip_actions = config.get('clip_actions', True)
self.network_path = config.get('network_path', "./runs")
self.network_path = os.path.join(self.network_path, self.config['name'])
self.network_path = os.path.join(self.network_path, 'nn')
net_config = self._build_net_config()
self.model = self.network.build(net_config)
self.model.to(self.ppo_device)
self.states = None
self.init_rnn_from_model(self.model)
self.last_lr = float(self.last_lr)
self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)
if self.has_central_value:
cv_config = {
'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape),
'value_size' : self.value_size,
'ppo_device' : self.ppo_device,
'num_agents' : self.num_agents,
'num_steps' : self.horizon_length,
'num_actors' : self.num_actors,
'num_actions' : self.actions_num,
'seq_len' : self.seq_len,
'model' : self.central_value_config['network'],
'config' : self.central_value_config,
'writter' : self.writer,
'multi_gpu' : self.multi_gpu
}
self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device)
self.use_experimental_cv = self.config.get('use_experimental_cv', True)
self.algo_observer.after_init(self)
return
def init_tensors(self):
super().init_tensors()
self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses'])
self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values'])
self.tensor_list += ['next_obses']
return
def train(self):
self.init_tensors()
self.last_mean_rewards = -100500
start_time = time.time()
total_time = 0
rep_count = 0
self.frame = 0
self.obs = self.env_reset()
self.curr_frames = self.batch_size_envs
self.model_output_file = os.path.join(self.network_path, self.config['name'])
if self.multi_gpu:
self.hvd.setup_algo(self)
self._init_train()
while True:
epoch_num = self.update_epoch()
train_info = self.train_epoch()
sum_time = train_info['total_time']
total_time += sum_time
frame = self.frame
if self.multi_gpu:
self.hvd.sync_stats(self)
if self.rank == 0:
scaled_time = sum_time
scaled_play_time = train_info['play_time']
curr_frames = self.curr_frames
self.frame += curr_frames
if self.print_stats:
fps_step = curr_frames / scaled_play_time
fps_total = curr_frames / scaled_time
print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}')
self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame)
self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame)
self.writer.add_scalar('info/epochs', epoch_num, frame)
self._log_train_info(train_info, frame)
self.algo_observer.after_print_stats(frame, epoch_num, total_time)
if self.game_rewards.current_size > 0:
mean_rewards = self.game_rewards.get_mean()
mean_lengths = self.game_lengths.get_mean()
for i in range(self.value_size):
self.writer.add_scalar('rewards/frame'.format(i), mean_rewards[i], frame)
self.writer.add_scalar('rewards/iter'.format(i), mean_rewards[i], epoch_num)
self.writer.add_scalar('rewards/time'.format(i), mean_rewards[i], total_time)
self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame)
self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
if self.has_self_play_config:
self.self_play_manager.update(self)
if self.save_freq > 0:
if (epoch_num % self.save_freq == 0):
self.save(self.model_output_file + "_" + str(epoch_num))
if epoch_num > self.max_epochs:
self.save(self.model_output_file)
print('MAX EPOCHS NUM!')
return self.last_mean_rewards, epoch_num
update_time = 0
return
def train_epoch(self):
play_time_start = time.time()
with torch.no_grad():
if self.is_rnn:
batch_dict = self.play_steps_rnn()
else:
batch_dict = self.play_steps()
play_time_end = time.time()
update_time_start = time.time()
rnn_masks = batch_dict.get('rnn_masks', None)
self.set_train()
self.curr_frames = batch_dict.pop('played_frames')
self.prepare_dataset(batch_dict)
self.algo_observer.after_steps()
if self.has_central_value:
self.train_central_value()
train_info = None
if self.is_rnn:
frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
print(frames_mask_ratio)
for _ in range(0, self.mini_epochs_num):
ep_kls = []
for i in range(len(self.dataset)):
curr_train_info = self.train_actor_critic(self.dataset[i])
print(type(curr_train_info))
if self.schedule_type == 'legacy':
if self.multi_gpu:
curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
self.update_lr(self.last_lr)
if (train_info is None):
train_info = dict()
for k, v in curr_train_info.items():
train_info[k] = [v]
else:
for k, v in curr_train_info.items():
train_info[k].append(v)
av_kls = torch_ext.mean_list(train_info['kl'])
if self.schedule_type == 'standard':
if self.multi_gpu:
av_kls = self.hvd.average_value(av_kls, 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
self.update_lr(self.last_lr)
if self.schedule_type == 'standard_epoch':
if self.multi_gpu:
av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
self.update_lr(self.last_lr)
update_time_end = time.time()
play_time = play_time_end - play_time_start
update_time = update_time_end - update_time_start
total_time = update_time_end - play_time_start
train_info['play_time'] = play_time
train_info['update_time'] = update_time
train_info['total_time'] = total_time
self._record_train_batch_info(batch_dict, train_info)
return train_info
def play_steps(self):
self.set_eval()
epinfos = []
update_list = self.update_list
for n in range(self.horizon_length):
self.obs, done_env_ids = self._env_reset_done()
self.experience_buffer.update_data('obses', n, self.obs['obs'])
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, masks)
else:
res_dict = self.get_action_values(self.obs)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
shaped_rewards = self.rewards_shaper(rewards)
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
terminated = infos['terminate'].float()
terminated = terminated.unsqueeze(-1)
next_vals = self._eval_critic(self.obs)
next_vals *= (1.0 - terminated)
self.experience_buffer.update_data('next_values', n, next_vals)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
self.game_rewards.update(self.current_rewards[done_indices])
self.game_lengths.update(self.current_lengths[done_indices])
self.algo_observer.process_infos(infos, done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_next_values = self.experience_buffer.tensor_dict['next_values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
return batch_dict
def calc_gradients(self, input_dict):
self.set_train()
value_preds_batch = input_dict['old_values']
old_action_log_probs_batch = input_dict['old_logp_actions']
advantage = input_dict['advantages']
old_mu_batch = input_dict['mu']
old_sigma_batch = input_dict['sigma']
return_batch = input_dict['returns']
actions_batch = input_dict['actions']
obs_batch = input_dict['obs']
obs_batch = self._preproc_obs(obs_batch)
lr = self.last_lr
kl = 1.0
lr_mul = 1.0
curr_e_clip = lr_mul * self.e_clip
batch_dict = {
'is_train': True,
'prev_actions': actions_batch,
'obs' : obs_batch
}
rnn_masks = None
if self.is_rnn:
rnn_masks = input_dict['rnn_masks']
batch_dict['rnn_states'] = input_dict['rnn_states']
batch_dict['seq_length'] = self.seq_len
with torch.cuda.amp.autocast(enabled=self.mixed_precision):
res_dict = self.model(batch_dict)
action_log_probs = res_dict['prev_neglogp']
values = res_dict['value']
entropy = res_dict['entropy']
mu = res_dict['mu']
sigma = res_dict['sigma']
a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
a_loss = a_info['actor_loss']
c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
c_loss = c_info['critic_loss']
b_loss = self.bound_loss(mu)
losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss, entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks)
a_loss, c_loss, entropy, b_loss = losses[0], losses[1], losses[2], losses[3]
loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss
if self.multi_gpu:
self.optimizer.zero_grad()
else:
for param in self.model.parameters():
param.grad = None
self.scaler.scale(loss).backward()
#TODO: Refactor this ugliest code of the year
if self.truncate_grads:
if self.multi_gpu:
self.optimizer.synchronize()
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
with self.optimizer.skip_synchronize():
self.scaler.step(self.optimizer)
self.scaler.update()
else:
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
self.scaler.step(self.optimizer)
self.scaler.update()
else:
self.scaler.step(self.optimizer)
self.scaler.update()
with torch.no_grad():
reduce_kl = not self.is_rnn
kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
if self.is_rnn:
kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask
self.train_result = {
'entropy': entropy,
'kl': kl_dist,
'last_lr': self.last_lr,
'lr_mul': lr_mul,
'b_loss': b_loss
}
self.train_result.update(a_info)
self.train_result.update(c_info)
return
def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values):
lastgaelam = 0
mb_advs = torch.zeros_like(mb_rewards)
for t in reversed(range(self.horizon_length)):
not_done = 1.0 - mb_fdones[t]
not_done = not_done.unsqueeze(1)
delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t]
lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam
mb_advs[t] = lastgaelam
return mb_advs
def bound_loss(self, mu):
if self.bounds_loss_coef is not None:
soft_bound = 1.0
mu_loss_high = torch.maximum(mu - soft_bound, torch.tensor(0, device=self.ppo_device))**2
mu_loss_low = torch.minimum(mu + soft_bound, torch.tensor(0, device=self.ppo_device))**2
b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1)
else:
b_loss = 0
return b_loss
def _load_config_params(self, config):
self.last_lr = config['learning_rate']
return
def _build_net_config(self):
obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
config = {
'actions_num' : self.actions_num,
'input_shape' : obs_shape,
'num_seqs' : self.num_actors * self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value' : self.normalize_value,
'normalize_input': self.normalize_input,
}
return config
def _setup_action_space(self):
action_space = self.env_info['action_space']
self.actions_num = action_space.shape[0]
# todo introduce device instead of cuda()
self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device)
self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device)
return
def _init_train(self):
return
def _env_reset_done(self):
obs, done_env_ids = self.vec_env.reset_done()
return self.obs_to_tensors(obs), done_env_ids
def _eval_critic(self, obs_dict):
self.model.eval()
obs = obs_dict['obs']
processed_obs = self._preproc_obs(obs)
if self.normalize_input:
processed_obs = self.model.norm_obs(processed_obs)
value = self.model.a2c_network.eval_critic(processed_obs)
if self.normalize_value:
value = self.value_mean_std(value, True)
return value
def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip):
clip_frac = None
if (self.ppo):
ratio = torch.exp(old_action_log_probs_batch - action_log_probs)
surr1 = advantage * ratio
surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip,
1.0 + curr_e_clip)
a_loss = torch.max(-surr1, -surr2)
clipped = torch.abs(ratio - 1.0) > curr_e_clip
clip_frac = torch.mean(clipped.float())
clip_frac = clip_frac.detach()
else:
a_loss = (action_log_probs * advantage)
info = {
'actor_loss': a_loss,
'actor_clip_frac': clip_frac
}
return info
def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value):
if clip_value:
value_pred_clipped = value_preds_batch + \
(values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip)
value_losses = (values - return_batch)**2
value_losses_clipped = (value_pred_clipped - return_batch)**2
c_loss = torch.max(value_losses, value_losses_clipped)
else:
c_loss = (return_batch - values)**2
info = {
'critic_loss': c_loss
}
return info
def _record_train_batch_info(self, batch_dict, train_info):
return
def _log_train_info(self, train_info, frame):
self.writer.add_scalar('performance/update_time', train_info['update_time'], frame)
self.writer.add_scalar('performance/play_time', train_info['play_time'], frame)
self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame)
self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame)
self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame)
self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame)
self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame)
self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame)
self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame)
self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame)
self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame)
return
================================================
FILE: timechamber/learning/common_player.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import torch
from rl_games.algos_torch import players
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.running_mean_std import RunningMeanStd
from rl_games.common.player import BasePlayer
class CommonPlayer(players.PpoPlayerContinuous):
def __init__(self, params):
BasePlayer.__init__(self, params)
self.network = self.config['network']
self.normalize_input = self.config['normalize_input']
self.normalize_value = self.config['normalize_value']
self._setup_action_space()
self.mask = [False]
net_config = self._build_net_config()
self._build_net(net_config)
return
def run(self):
n_games = self.games_num
render = self.render_env
n_game_life = self.n_game_life
is_determenistic = self.is_determenistic
sum_rewards = 0
sum_steps = 0
sum_game_res = 0
n_games = n_games * n_game_life
games_played = 0
has_masks = False
has_masks_func = getattr(self.env, "has_action_mask", None) is not None
op_agent = getattr(self.env, "create_agent", None)
if op_agent:
agent_inited = True
if has_masks_func:
has_masks = self.env.has_action_mask()
need_init_rnn = self.is_rnn
for _ in range(n_games):
if games_played >= n_games:
break
obs_dict = self.env_reset(self.env)
batch_size = 1
batch_size = self.get_batch_size(obs_dict['obs'], batch_size)
if need_init_rnn:
self.init_rnn()
need_init_rnn = False
cr = torch.zeros(batch_size, dtype=torch.float32)
steps = torch.zeros(batch_size, dtype=torch.float32)
print_game_res = False
for n in range(self.max_steps):
obs_dict, done_env_ids = self._env_reset_done()
if has_masks:
masks = self.env.get_action_mask()
action = self.get_masked_action(obs_dict, masks, is_determenistic)
else:
action = self.get_action(obs_dict, is_determenistic)
obs_dict, r, done, info = self.env_step(self.env, action)
cr += r
steps += 1
self._post_step(info)
if render:
self.env.render(mode = 'human')
time.sleep(self.render_sleep)
all_done_indices = done.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
done_count = len(done_indices)
games_played += done_count
if done_count > 0:
if self.is_rnn:
for s in self.states:
s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0
cur_rewards = cr[done_indices].sum().item()
cur_steps = steps[done_indices].sum().item()
cr = cr * (1.0 - done.float())
steps = steps * (1.0 - done.float())
sum_rewards += cur_rewards
sum_steps += cur_steps
game_res = 0.0
if isinstance(info, dict):
if 'battle_won' in info:
print_game_res = True
game_res = info.get('battle_won', 0.5)
if 'scores' in info:
print_game_res = True
game_res = info.get('scores', 0.5)
if self.print_stats:
if print_game_res:
print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
else:
print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)
sum_game_res += game_res
if batch_size//self.num_agents == 1 or games_played >= n_games:
break
print(sum_rewards)
if print_game_res:
print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
else:
print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)
return
def obs_to_torch(self, obs):
obs = super().obs_to_torch(obs)
obs_dict = {
'obs': obs
}
return obs_dict
def get_action(self, obs_dict, is_determenistic = False):
output = super().get_action(obs_dict['obs'], is_determenistic)
return output
def _build_net(self, config):
self.model = self.network.build(config)
self.model.to(self.device)
self.model.eval()
self.is_rnn = self.model.is_rnn()
return
def _env_reset_done(self):
obs, done_env_ids = self.env.reset_done()
return self.obs_to_torch(obs), done_env_ids
def _post_step(self, info):
return
def _build_net_config(self):
obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
config = {
'actions_num' : self.actions_num,
'input_shape' : obs_shape,
'num_seqs' : self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value': self.normalize_value,
'normalize_input': self.normalize_input,
}
return config
def _setup_action_space(self):
self.actions_num = self.action_space.shape[0]
self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
return
================================================
FILE: timechamber/learning/hrl_sp_agent.py
================================================
import copy
from collections import OrderedDict
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
from .pfsp_player_pool import PFSPPlayerPool, SinglePlayer, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
PFSPPlayerVectorizedPool
from rl_games.common.a2c_common import swap_and_flatten01
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
from isaacgym.torch_utils import *
import torch
from torch import optim
from tensorboardX import SummaryWriter
import torch.distributed as dist
import timechamber.ase.hrl_agent as hrl_agent
from timechamber.utils.utils import load_check, load_checkpoint
class HRLSPAgent(hrl_agent.HRLAgent):
def __init__(self, base_name, params):
params['config']['device'] = params['device']
super().__init__(base_name, params)
self.player_pool_type = params['player_pool_type']
self.base_model_config = {
'actions_num': self.actions_num,
'input_shape': self.obs_shape,
'num_seqs': self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value': self.normalize_value,
'normalize_input': self.normalize_input,
}
self.max_his_player_num = params['player_pool_length']
if params['op_load_path']:
self.init_op_model = self.create_model()
self.restore_op(params['op_load_path'])
else:
self.init_op_model = self.model
self.players_dir = os.path.join(self.experiment_dir, 'policy_dir')
os.makedirs(self.players_dir, exist_ok=True)
self.update_win_rate = params['update_win_rate']
self.num_opponent_agents = params['num_agents'] - 1
self.player_pool = self._build_player_pool(params)
self.games_to_check = params['games_to_check']
self.now_update_steps = 0
self.max_update_steps = params['max_update_steps']
self.update_op_num = 0
self.update_player_pool(self.init_op_model, player_idx=self.update_op_num)
self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))
assert self.num_actors % self.max_his_player_num == 0
def _build_player_pool(self, params):
if self.player_pool_type == 'vectorized':
vector_model_config = self.base_model_config
vector_model_config['num_envs'] = self.num_actors * self.num_opponent_agents
vector_model_config['population_size'] = self.max_his_player_num
return PFSPPlayerVectorizedPool(max_length=self.max_his_player_num, device=self.device,
vector_model_config=vector_model_config, params=params)
else:
return PFSPPlayerPool(max_length=self.max_his_player_num, device=self.device)
def play_steps(self):
self.set_eval()
env_done_indices = torch.tensor([], device=self.device, dtype=torch.long)
update_list = self.update_list
step_time = 0.0
for n in range(self.horizon_length):
self.obs = self.env_reset(env_done_indices)
self.experience_buffer.update_data('obses', n, self.obs['obs'])
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, masks)
else:
res_dict_op = self.get_action_values(self.obs, is_op=True)
res_dict = self.get_action_values(self.obs)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
if self.player_pool_type == 'multi_thread':
self.player_pool.thread_pool.shutdown()
step_time_start = time.time()
self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'],
res_dict_op['actions'])
step_time_end = time.time()
step_time += (step_time_end - step_time_start)
shaped_rewards = self.rewards_shaper(rewards)
if self.value_bootstrap and 'time_outs' in infos:
shaped_rewards += self.gamma * res_dict['values'] * self.cast_obs(infos['time_outs']).unsqueeze(
1).float()
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards'])
terminated = infos['terminate'].float()
terminated = terminated.unsqueeze(-1)
next_vals = self._eval_critic(self.obs)
next_vals *= (1.0 - terminated)
self.experience_buffer.update_data('next_values', n, next_vals)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
env_done_indices = self.dones.view(self.num_actors, self.num_agents).all(dim=1).nonzero(as_tuple=False)
self.game_rewards.update(self.current_rewards[env_done_indices])
self.game_lengths.update(self.current_lengths[env_done_indices])
self.algo_observer.process_infos(infos, env_done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
self.player_pool.update_player_metric(infos=infos)
self.resample_op(all_done_indices.flatten())
env_done_indices = env_done_indices[:, 0]
last_values = self.get_values(self.obs)
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_next_values = self.experience_buffer.tensor_dict['next_values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards']
mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards)
mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
batch_dict['step_time'] = step_time
return batch_dict
def env_step(self, ego_actions, op_actions):
ego_actions = self.preprocess_actions(ego_actions)
op_actions = self.preprocess_actions(op_actions)
obs = self.obs['obs']
obs_op = self.obs['obs_op']
rewards = 0.0
disc_rewards = 0.0
done_count = 0.0
terminate_count = 0.0
win_count = 0.0
lose_count = 0.0
draw_count = 0.0
for t in range(self._llc_steps):
llc_ego_actions = self._compute_llc_action(obs, ego_actions)
llc_op_actions = self._compute_llc_action(obs_op, op_actions)
llc_actions = torch.cat((llc_ego_actions, llc_op_actions), dim=0)
obs_dict, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions)
rewards += curr_rewards
done_count += curr_dones
terminate_count += infos['terminate']
win_count += infos['win']
lose_count += infos['lose']
draw_count += infos['draw']
amp_obs = infos['amp_obs']
curr_disc_reward = self._calc_disc_reward(amp_obs)
disc_rewards += curr_disc_reward
obs = obs_dict['obs'][:self.num_actors]
obs_op = obs_dict['obs'][self.num_actors:]
rewards /= self._llc_steps
disc_rewards /= self._llc_steps
dones = torch.zeros_like(done_count)
dones[done_count > 0] = 1.0
terminate = torch.zeros_like(terminate_count)
terminate[terminate_count > 0] = 1.0
infos['terminate'] = terminate
infos['disc_rewards'] = disc_rewards
wins = torch.zeros_like(win_count)
wins[win_count > 0] = 1.0
infos['win'] = wins
loses = torch.zeros_like(lose_count)
loses[lose_count > 0] = 1.0
infos['lose'] = loses
draws = torch.zeros_like(draw_count)
draws[draw_count > 0] = 1.0
infos['draw'] = draws
obs_dict = {}
obs_dict['obs'] = obs
obs_dict['obs_op'] = obs_op
if self.is_tensor_obses:
if self.value_size == 1:
rewards = rewards.unsqueeze(1)
return self.obs_to_tensors(obs_dict), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
else:
if self.value_size == 1:
rewards = np.expand_dims(rewards, axis=1)
return self.obs_to_tensors(obs_dict), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(dones).to(self.ppo_device), infos
def env_reset(self, env_ids=None):
obs = self.vec_env.reset(env_ids)
obs = self.obs_to_tensors(obs)
obs['obs_op'] = obs['obs'][self.num_actors:]
obs['obs'] = obs['obs'][:self.num_actors]
return obs
def train(self):
self.init_tensors()
self.mean_rewards = self.last_mean_rewards = -100500
start_time = time.time()
total_time = 0
rep_count = 0
# self.frame = 0 # loading from checkpoint
self.obs = self.env_reset()
if self.multi_gpu:
torch.cuda.set_device(self.rank)
print("====================broadcasting parameters")
model_params = [self.model.state_dict()]
dist.broadcast_object_list(model_params, 0)
self.model.load_state_dict(model_params[0])
self._init_train()
while True:
epoch_num = self.update_epoch()
train_info = self.train_epoch()
print(f"epoch num: {epoch_num}")
sum_time = train_info['total_time']
step_time = train_info['step_time']
play_time = train_info['play_time']
update_time = train_info['update_time']
a_losses = train_info['actor_loss']
c_losses = train_info['critic_loss']
entropies = train_info['entropy']
kls = train_info['kl']
last_lr = train_info['last_lr'][-1]
lr_mul = train_info['lr_mul'][-1]
# cleaning memory to optimize space
self.dataset.update_values_dict(None)
total_time += sum_time
curr_frames = self.curr_frames * self.rank_size if self.multi_gpu else self.curr_frames
self.frame += curr_frames
should_exit = False
if self.rank == 0:
self.diagnostics.epoch(self, current_epoch=epoch_num)
scaled_time = self.num_agents * sum_time
scaled_play_time = self.num_agents * play_time
frame = self.frame // self.num_agents
if self.print_stats:
step_time = max(step_time, 1e-6)
fps_step = curr_frames / step_time
fps_step_inference = curr_frames / scaled_play_time
fps_total = curr_frames / scaled_time
print(
f'fps step: {fps_step:.0f} fps step and policy inference: {fps_step_inference:.0f} fps total: {fps_total:.0f} epoch: {epoch_num}/{self.max_epochs}')
self.write_stats(total_time, epoch_num, step_time, play_time, update_time, a_losses, c_losses,
entropies, kls, last_lr, lr_mul, frame, scaled_time, scaled_play_time, curr_frames)
self.algo_observer.after_print_stats(frame, epoch_num, total_time)
if self.game_rewards.current_size > 0:
mean_rewards = self.game_rewards.get_mean()
mean_lengths = self.game_lengths.get_mean()
self.mean_rewards = mean_rewards[0]
for i in range(self.value_size):
rewards_name = 'rewards' if i == 0 else 'rewards{0}'.format(i)
self.writer.add_scalar(rewards_name + '/step'.format(i), mean_rewards[i], frame)
self.writer.add_scalar(rewards_name + '/iter'.format(i), mean_rewards[i], epoch_num)
self.writer.add_scalar(rewards_name + '/time'.format(i), mean_rewards[i], total_time)
self.writer.add_scalar('episode_lengths/step', mean_lengths, frame)
self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
self.writer.add_scalar('episode_lengths/time', mean_lengths, total_time)
# removed equal signs (i.e. "rew=") from the checkpoint name since it messes with hydra CLI parsing
checkpoint_name = self.config['name'] + '_ep_' + str(epoch_num) + '_rew_' + str(mean_rewards[0])
if self.save_freq > 0:
if (epoch_num % self.save_freq == 0) and (mean_rewards <= self.last_mean_rewards):
self.save(os.path.join(self.nn_dir, 'last_' + checkpoint_name))
if mean_rewards[0] > self.last_mean_rewards and epoch_num >= self.save_best_after:
print('saving next best rewards: ', mean_rewards)
self.last_mean_rewards = mean_rewards[0]
self.save(os.path.join(self.nn_dir, self.config['name']))
if 'score_to_win' in self.config:
if self.last_mean_rewards > self.config['score_to_win']:
print('Network won!')
self.save(os.path.join(self.nn_dir, checkpoint_name))
should_exit = True
if epoch_num >= self.max_epochs:
if self.game_rewards.current_size == 0:
print('WARNING: Max epochs reached before any env terminated at least once')
mean_rewards = -np.inf
self.save(os.path.join(self.nn_dir,
'last_' + self.config['name'] + 'ep' + str(epoch_num) + 'rew' + str(
mean_rewards)))
print('MAX EPOCHS NUM!')
should_exit = True
self.update_metric()
update_time = 0
if self.multi_gpu:
should_exit_t = torch.tensor(should_exit, device=self.device).float()
dist.broadcast(should_exit_t, 0)
should_exit = should_exit_t.bool().item()
if should_exit:
return self.last_mean_rewards, epoch_num
def update_metric(self):
tot_win_rate = 0
tot_games_num = 0
self.now_update_steps += 1
# self_player process
for player in self.player_pool.players:
win_rate = player.win_rate()
games = player.games_num()
self.writer.add_scalar(f'rate/win_rate_player_{player.player_idx}', win_rate, self.epoch_num)
tot_win_rate += win_rate * games
tot_games_num += games
win_rate = tot_win_rate / tot_games_num
if tot_games_num > self.games_to_check:
self.check_update_opponent(win_rate)
self.writer.add_scalar('rate/win_rate', win_rate, self.epoch_num)
def get_action_values(self, obs, is_op=False):
processed_obs = self._preproc_obs(obs['obs_op'] if is_op else obs['obs'])
if not is_op:
self.model.eval()
input_dict = {
'is_train': False,
'prev_actions': None,
'obs': processed_obs,
'rnn_states': self.rnn_states
}
with torch.no_grad():
if is_op:
res_dict = {
"actions": torch.zeros((self.num_actors * self.num_opponent_agents, self.actions_num),
device=self.device),
"values": torch.zeros((self.num_actors * self.num_opponent_agents, 1), device=self.device)
}
self.player_pool.inference(input_dict, res_dict, processed_obs)
else:
res_dict = self.model(input_dict)
if self.has_central_value:
states = obs['states']
input_dict = {
'is_train': False,
'states': states,
}
value = self.get_central_value(input_dict)
res_dict['values'] = value
return res_dict
def restore(self, fn):
checkpoint = load_checkpoint(fn, device=self.device)
checkpoint = load_check(checkpoint=checkpoint,
normalize_input=self.normalize_input,
normalize_value=self.normalize_value)
self.set_full_state_weights(checkpoint)
def resample_op(self, resample_indices):
for op_idx in range(self.num_opponent_agents):
for player in self.player_pool.players:
player.remove_envs(resample_indices + op_idx * self.num_actors)
for op_idx in range(self.num_opponent_agents):
for env_idx in resample_indices:
player = self.player_pool.sample_player()
player.add_envs(env_idx + op_idx * self.num_actors)
for player in self.player_pool.players:
player.reset_envs()
def resample_batch(self):
env_indices = torch.arange(end=self.num_actors * self.num_opponent_agents,
device=self.device, dtype=torch.long,
requires_grad=False)
step = self.num_actors // 32
for player in self.player_pool.players:
player.clear_envs()
for i in range(0, self.num_actors, step):
player = self.player_pool.sample_player()
player.add_envs(env_indices[i:i + step])
print("resample done")
def restore_op(self, fn):
checkpoint = load_checkpoint(fn, device=self.device)
checkpoint = load_check(checkpoint, normalize_input=self.normalize_input,
normalize_value=self.normalize_value)
self.init_op_model.load_state_dict(checkpoint['model'])
if self.normalize_input and 'running_mean_std' in checkpoint:
self.init_op_model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])
def check_update_opponent(self, win_rate):
if win_rate > self.update_win_rate or self.now_update_steps > self.max_update_steps:
print(f'winrate:{win_rate},add opponent to player pool')
self.update_op_num += 1
self.now_update_steps = 0
self.update_player_pool(self.model, player_idx=self.update_op_num)
self.player_pool.clear_player_metric()
self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))
self.save(os.path.join(self.players_dir, f'policy_{self.update_op_num}'))
def create_model(self):
model = self.network.build(self.base_model_config)
model.to(self.device)
return model
def update_player_pool(self, model, player_idx):
new_model = self.create_model()
new_model.load_state_dict(copy.deepcopy(model.state_dict()))
if hasattr(model, 'running_mean_std'):
new_model.running_mean_std.load_state_dict(copy.deepcopy(model.running_mean_std.state_dict()))
player = SinglePlayer(player_idx, new_model, self.device, self.num_actors * self.num_opponent_agents)
self.player_pool.add_player(player)
================================================
FILE: timechamber/learning/hrl_sp_player.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import os
import time
import torch
import numpy as np
from rl_games.algos_torch import players
import random
from rl_games.algos_torch import torch_ext
from rl_games.common.tr_helpers import unsqueeze_obs
from timechamber.ase import hrl_players
from timechamber.utils.utils import load_check, load_checkpoint
from .pfsp_player_pool import PFSPPlayerPool, PFSPPlayerVectorizedPool, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
SinglePlayer
import matplotlib.pyplot as plt
from multielo import MultiElo
class HRLSPPlayer(hrl_players.HRLPlayer):
def __init__(self, params):
params['config']['device_name'] = params['device']
super().__init__(params)
print(f'params:{params}')
self.network = self.config['network']
self.mask = [False]
self.is_rnn = False
self.normalize_input = self.config['normalize_input']
self.normalize_value = self.config.get('normalize_value', False)
self.base_model_config = {
'actions_num': self.actions_num,
'input_shape': self.obs_shape,
'num_seqs': self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value': self.normalize_value,
'normalize_input': self.normalize_input,
}
self.policy_timestep = []
self.policy_op_timestep = []
self.params = params
self.record_elo = self.player_config.get('record_elo', False)
self.init_elo = self.player_config.get('init_elo', 400)
self.num_actors = params['config']['num_actors']
self.player_pool_type = params['player_pool_type']
self.player_pool = None
self.op_player_pool = None
self.num_opponents = params['num_agents'] - 1
self.max_steps = 1000
self.update_op_num = 0
self.players_per_env = []
self.elo = MultiElo()
def restore(self, load_dir):
if os.path.isdir(load_dir):
self.player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
print('dir:', load_dir)
sorted_players = []
for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
self.policy_timestep.append(model_timestep)
model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
sorted_players.append(new_player)
sorted_players.sort(key=lambda player: player.player_idx)
for idx, player in enumerate(sorted_players):
player.player_idx = idx
self.player_pool.add_player(player)
self.policy_timestep.sort()
else:
self.player_pool = self._build_player_pool(params=self.params, player_num=1)
self.policy_timestep.append(os.path.getmtime(load_dir))
model = self.load_model(load_dir)
new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
self.player_pool.add_player(new_player)
self.restore_op(self.params['op_load_path'])
self._norm_policy_timestep()
self._alloc_env_indices()
def restore_op(self, load_dir):
if os.path.isdir(load_dir):
self.op_player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
sorted_players = []
for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
self.policy_op_timestep.append(model_timestep)
model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
sorted_players.append(new_player)
sorted_players.sort(key=lambda player: player.player_idx)
for idx, player in enumerate(sorted_players):
player.player_idx = idx
self.op_player_pool.add_player(player)
self.policy_op_timestep.sort()
else:
self.op_player_pool = self._build_player_pool(params=self.params, player_num=1)
self.policy_op_timestep.append(os.path.getmtime(load_dir))
model = self.load_model(load_dir)
new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
rating=400, obs_batch_len=self.num_actors * self.num_opponents)
self.op_player_pool.add_player(new_player)
def _alloc_env_indices(self):
for idx in range(self.num_actors):
player_idx = random.randint(0, len(self.player_pool.players) - 1)
self.player_pool.players[player_idx].add_envs(torch.tensor([idx], dtype=torch.long, device=self.device))
env_player = [self.player_pool.players[player_idx]]
for op_idx in range(self.num_opponents):
op_player_idx = random.randint(0, len(self.op_player_pool.players) - 1)
self.op_player_pool.players[op_player_idx].add_envs(
torch.tensor([idx + op_idx * self.num_actors], dtype=torch.long, device=self.device))
env_player.append(self.op_player_pool.players[op_player_idx])
self.players_per_env.append(env_player)
for player in self.player_pool.players:
player.reset_envs()
for player in self.op_player_pool.players:
player.reset_envs()
def _build_player_pool(self, params, player_num):
if self.player_pool_type == 'multi_thread':
return PFSPPlayerProcessPool(max_length=player_num,
device=self.device)
elif self.player_pool_type == 'multi_process':
return PFSPPlayerThreadPool(max_length=player_num,
device=self.device)
elif self.player_pool_type == 'vectorized':
vector_model_config = self.base_model_config
vector_model_config['num_envs'] = self.num_actors * self.num_opponents
vector_model_config['population_size'] = player_num
return PFSPPlayerVectorizedPool(max_length=player_num, device=self.device,
vector_model_config=vector_model_config, params=params)
else:
return PFSPPlayerPool(max_length=player_num, device=self.device)
def _update_rating(self, info, env_indices):
for env_idx in env_indices:
if self.num_opponents == 1:
player = self.players_per_env[env_idx][0]
op_player = self.players_per_env[env_idx][1]
if info['win'][env_idx]:
player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating])
elif info['lose'][env_idx]:
op_player.rating, player.rating = self.elo.get_new_ratings([op_player.rating, player.rating])
elif info['draw'][env_idx]:
player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating],
result_order=[1, 1])
else:
ranks = info['ranks'][env_idx].cpu().numpy()
players_sorted_by_rank = sorted(enumerate(self.players_per_env[env_idx]), key=lambda x: ranks[x[0]])
sorted_ranks = sorted(ranks)
now_ratings = [player.rating for idx, player in players_sorted_by_rank]
new_ratings = self.elo.get_new_ratings(now_ratings, result_order=sorted_ranks)
for idx, new_rating in enumerate(new_ratings):
players_sorted_by_rank[idx][1].rating = new_rating
def run(self):
n_games = self.games_num
render = self.render_env
n_game_life = self.n_game_life
is_determenistic = self.is_determenistic
sum_rewards = 0
sum_steps = 0
sum_game_res = 0
n_games = n_games * n_game_life
games_played = 0
has_masks = False
has_masks_func = getattr(self.env, "has_action_mask", None) is not None
if has_masks_func:
has_masks = self.env.has_action_mask()
print(f'games_num:{n_games}')
need_init_rnn = self.is_rnn
for _ in range(n_games):
if games_played >= n_games:
break
obses = self.env_reset(self.env)
batch_size = 1
batch_size = self.get_batch_size(obses['obs'], batch_size)
if need_init_rnn:
self.init_rnn()
need_init_rnn = False
cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
print_game_res = False
done_indices = torch.tensor([], device=self.device, dtype=torch.long)
for n in range(self.max_steps):
obses = self.env_reset(self.env, done_indices)
if has_masks:
masks = self.env.get_action_mask()
action = self.get_masked_action(
obses, masks, is_determenistic)
else:
action = self.get_action(obses['obs'], is_determenistic)
action_op = self.get_action(obses['obs_op'], is_determenistic, is_op=True)
obses, r, done, info = self.env_step(self.env, obses, action, action_op)
cr += r
steps += 1
if render:
self.env.render(mode='human')
time.sleep(self.render_sleep)
all_done_indices = done.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
done_count = len(done_indices)
games_played += done_count
if self.record_elo:
self._update_rating(info, all_done_indices.flatten())
if done_count > 0:
if self.is_rnn:
for s in self.states:
s[:, all_done_indices, :] = s[:, all_done_indices, :] * 0.0
cur_rewards = cr[done_indices].sum().item()
cur_steps = steps[done_indices].sum().item()
cr = cr * (1.0 - done.float())
steps = steps * (1.0 - done.float())
sum_rewards += cur_rewards
sum_steps += cur_steps
game_res = 0.0
if isinstance(info, dict):
if 'battle_won' in info:
print_game_res = True
game_res = info.get('battle_won', 0.5)
if 'scores' in info:
print_game_res = True
game_res = info.get('scores', 0.5)
if self.print_stats:
if print_game_res:
print('reward:', cur_rewards / done_count,
'steps:', cur_steps / done_count, 'w:', game_res)
else:
print('reward:', cur_rewards / done_count,
'steps:', cur_steps / done_count)
sum_game_res += game_res
if batch_size // self.num_agents == 1 or games_played >= n_games:
break
done_indices = done_indices[:, 0]
if self.record_elo:
self._plot_elo_curve()
def _plot_elo_curve(self):
x = np.array(self.policy_timestep)
y = np.arange(len(self.player_pool.players))
x_op = np.array(self.policy_op_timestep)
y_op = np.arange(len(self.op_player_pool.players))
for player in self.player_pool.players:
idx = player.player_idx
y[idx] = player.rating
for player in self.op_player_pool.players:
idx = player.player_idx
y_op[idx] = player.rating
if self.params['load_path'] != self.params['op_load_path']:
l1 = plt.plot(x, y, 'b--', label='policy')
l2 = plt.plot(x_op, y_op, 'r--', label='policy_op')
plt.plot(x, y, 'b^-', x_op, y_op, 'ro-')
else:
l1 = plt.plot(x, y, 'b--', label='policy')
plt.plot(x, y, 'b^-')
plt.title('ELO Curve')
plt.xlabel('timestep/days')
plt.ylabel('ElO')
plt.legend()
plt.savefig(self.params['load_path'] + '/../elo.jpg')
def get_action(self, obs, is_determenistic=False, is_op=False):
if self.has_batch_dimension == False:
obs = unsqueeze_obs(obs)
obs = self._preproc_obs(obs)
input_dict = {
'is_train': False,
'prev_actions': None,
'obs': obs,
'rnn_states': self.states
}
with torch.no_grad():
data_len = self.num_actors * self.num_opponents if is_op else self.num_actors
res_dict = {
"actions": torch.zeros((data_len, self.actions_num), device=self.device),
"values": torch.zeros((data_len, 1), device=self.device),
"mus": torch.zeros((data_len, self.actions_num), device=self.device)
}
if is_op:
self.op_player_pool.inference(input_dict, res_dict, obs)
else:
self.player_pool.inference(input_dict, res_dict, obs)
mu = res_dict['mus']
action = res_dict['actions']
if is_determenistic:
current_action = mu
else:
current_action = action
current_action = torch.squeeze(current_action.detach())
return torch.clamp(current_action, -1.0, 1.0)
def _norm_policy_timestep(self):
self.policy_op_timestep.sort()
self.policy_timestep.sort()
for idx in range(1, len(self.policy_op_timestep)):
self.policy_op_timestep[idx] -= self.policy_op_timestep[0]
self.policy_op_timestep[idx] /= 3600 * 24
for idx in range(1, len(self.policy_timestep)):
self.policy_timestep[idx] -= self.policy_timestep[0]
self.policy_timestep[idx] /= 3600 * 24
self.policy_timestep[0] = 0
if len(self.policy_op_timestep):
self.policy_op_timestep[0] = 0
def env_reset(self, env, env_ids=None):
obs = env.reset(env_ids)
obs_dict = {}
obs_dict['obs_op'] = obs[self.num_actors:]
obs_dict['obs'] = obs[:self.num_actors]
return obs_dict
def env_step(self, env, obs_dict, ego_actions, op_actions):
obs = obs_dict['obs']
obs_op = obs_dict['obs_op']
rewards = 0.0
done_count = 0.0
disc_rewards = 0.0
terminate_count = 0.0
win_count = 0.0
lose_count = 0.0
draw_count = 0.0
for t in range(self._llc_steps):
llc_ego_actions = self._compute_llc_action(obs, ego_actions)
llc_op_actions = self._compute_llc_action(obs_op, op_actions)
llc_actions = torch.cat((llc_ego_actions, llc_op_actions), dim=0)
obs_all, curr_rewards, curr_dones, infos = env.step(llc_actions)
rewards += curr_rewards
done_count += curr_dones
terminate_count += infos['terminate']
win_count += infos['win']
lose_count += infos['lose']
draw_count += infos['draw']
amp_obs = infos['amp_obs']
curr_disc_reward = self._calc_disc_reward(amp_obs)
curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy()
disc_rewards += curr_disc_reward
obs = obs_all[:self.num_actors]
obs_op = obs_all[self.num_actors:]
rewards /= self._llc_steps
disc_rewards /= self._llc_steps
dones = torch.zeros_like(done_count)
dones[done_count > 0] = 1.0
terminate = torch.zeros_like(terminate_count)
terminate[terminate_count > 0] = 1.0
infos['terminate'] = terminate
infos['disc_rewards'] = disc_rewards
wins = torch.zeros_like(win_count)
wins[win_count > 0] = 1.0
infos['win'] = wins
loses = torch.zeros_like(lose_count)
loses[lose_count > 0] = 1.0
infos['lose'] = loses
draws = torch.zeros_like(draw_count)
draws[draw_count > 0] = 1.0
infos['draw'] = draws
next_obs_dict = {}
next_obs_dict['obs_op'] = obs_op
next_obs_dict['obs'] = obs
if self.value_size > 1:
rewards = rewards[0]
if self.is_tensor_obses:
return self.obs_to_torch(next_obs_dict), rewards.cpu(), dones.cpu(), infos
else:
if np.isscalar(dones):
rewards = np.expand_dims(np.asarray(rewards), 0)
dones = np.expand_dims(np.asarray(dones), 0)
return next_obs_dict, rewards, dones, infos
def create_model(self):
model = self.network.build(self.base_model_config)
model.to(self.device)
return model
def load_model(self, fn):
model = self.create_model()
checkpoint = load_checkpoint(fn, device=self.device)
checkpoint = load_check(checkpoint, normalize_input=self.normalize_input,
normalize_value=self.normalize_value)
model.load_state_dict(checkpoint['model'])
if self.normalize_input and 'running_mean_std' in checkpoint:
model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])
return model
================================================
FILE: timechamber/learning/pfsp_player_pool.py
================================================
import collections
import random
import torch
import torch.multiprocessing as mp
import dill
# import time
from rl_games.algos_torch import model_builder
from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED
def player_inference_thread(model, input_dict, res_dict, env_indices, processed_obs):
if len(env_indices) == 0:
return None
input_dict['obs'] = processed_obs[env_indices]
out_dict = model(input_dict)
for key in res_dict:
res_dict[key][env_indices] = out_dict[key]
return out_dict
def player_inference_process(pipe, queue, barrier):
input_dict = {
'is_train': False,
'prev_actions': None,
'obs': None,
'rnn_states': None,
}
model = None
barrier.wait()
while True:
msg = pipe.recv()
task = msg['task']
if task == 'init':
if model is not None:
del model
model = queue.get()
model = dill.loads(model)
barrier.wait()
elif task == 'forward':
obs, actions, values, env_indices = queue.get()
input_dict['obs'] = obs[env_indices]
out_dict = model(input_dict)
actions[env_indices] = out_dict['actions']
values[env_indices] = out_dict['values']
barrier.wait()
del obs, actions, values, env_indices
elif task == 'terminate':
break
else:
barrier.wait()
class SinglePlayer:
def __init__(self, player_idx, model, device, obs_batch_len=0, rating=None):
self.model = model
if model:
self.model.eval()
self.player_idx = player_idx
self._games = torch.tensor(0, device=device, dtype=torch.float)
self._wins = torch.tensor(0, device=device, dtype=torch.float)
self._loses = torch.tensor(0, device=device, dtype=torch.float)
self._draws = torch.tensor(0, device=device, dtype=torch.float)
self._decay = 0.998
self._has_env = torch.zeros((obs_batch_len,), device=device, dtype=torch.bool)
self.device = device
self.env_indices = torch.tensor([], device=device, dtype=torch.long, requires_grad=False)
if rating:
self.rating = rating
def __call__(self, input_dict):
return self.model(input_dict)
def reset_envs(self):
self.env_indices = self._has_env.nonzero(as_tuple=True)
def remove_envs(self, env_indices):
self._has_env[env_indices] = False
def add_envs(self, env_indices):
self._has_env[env_indices] = True
def clear_envs(self):
self.env_indices = torch.tensor([], device=self.device, dtype=torch.long, requires_grad=False)
def update_metric(self, wins, loses, draws):
win_count = torch.sum(wins[self.env_indices])
lose_count = torch.sum(loses[self.env_indices])
draw_count = torch.sum(draws[self.env_indices])
for stats in (self._games, self._wins, self._loses, self._draws):
stats *= self._decay
self._games += win_count + lose_count + draw_count
self._wins += win_count
self._loses += lose_count
self._draws += draw_count
def clear_metric(self):
self._games = torch.tensor(0, device=self.device, dtype=torch.float)
self._wins = torch.tensor(0, device=self.device, dtype=torch.float)
self._loses = torch.tensor(0, device=self.device, dtype=torch.float)
self._draws = torch.tensor(0, device=self.device, dtype=torch.float)
def win_rate(self):
if self.model is None:
return 0
elif self._games == 0:
return 0.5
return (self._wins + 0.5 * self._draws) / self._games
def games_num(self):
return self._games
class PFSPPlayerPool:
def __init__(self, max_length, device):
assert max_length > 0
self.players = []
self.max_length = max_length
self.idx = 0
self.device = device
self.weightings = {
"variance": lambda x: x * (1 - x),
"linear": lambda x: 1 - x,
"squared": lambda x: (1 - x) ** 2,
}
def add_player(self, player):
if len(self.players) < self.max_length:
self.players.append(player)
else:
self.players[self.idx] = player
self.idx += 1
self.idx %= self.max_length
def sample_player(self, weight='linear'):
weight_func = self.weightings[weight]
player = \
random.choices(self.players, weights=[weight_func(player.win_rate()) for player in self.players])[0]
return player
def update_player_metric(self, infos):
for player in self.players:
player.update_metric(infos['win'], infos['lose'], infos['draw'])
def clear_player_metric(self):
for player in self.players:
player.clear_metric()
def inference(self, input_dict, res_dict, processed_obs):
for i, player in enumerate(self.players):
if len(player.env_indices[0]) == 0:
continue
input_dict['obs'] = processed_obs[player.env_indices]
out_dict = player(input_dict)
for key in res_dict:
res_dict[key][player.env_indices] = out_dict[key]
class PFSPPlayerVectorizedPool(PFSPPlayerPool):
def __init__(self, max_length, device, vector_model_config, params):
super(PFSPPlayerVectorizedPool, self).__init__(max_length, device)
params['model']['name'] = 'vectorized_a2c'
params['network']['name'] = 'vectorized_a2c'
builder = model_builder.ModelBuilder()
self.vectorized_network = builder.load(params)
self.vectorized_model = self.vectorized_network.build(vector_model_config)
self.vectorized_model.to(self.device)
self.vectorized_model.eval()
self.obs = torch.zeros(
(self.max_length, vector_model_config["num_envs"], vector_model_config['input_shape'][0]),
dtype=torch.float32, device=self.device)
for idx in range(max_length):
self.add_player(SinglePlayer(idx, None, self.device, vector_model_config["num_envs"]))
def inference(self, input_dict, res_dict, processed_obs):
for i, player in enumerate(self.players):
self.obs[i][player.env_indices] = processed_obs[player.env_indices]
input_dict['obs'] = self.obs
out_dict = self.vectorized_model(input_dict)
for i, player in enumerate(self.players):
if len(player.env_indices) == 0:
continue
for key in res_dict:
res_dict[key][player.env_indices] = out_dict[key][i][player.env_indices]
def add_player(self, player):
if player.model:
self.vectorized_model.update(self.idx, player.model)
super().add_player(player)
class PFSPPlayerThreadPool(PFSPPlayerPool):
def __init__(self, max_length, device):
super().__init__(max_length, device)
self.thread_pool = ThreadPoolExecutor(max_workers=self.max_length)
def inference(self, input_dict, res_dict, processed_obs):
self.thread_pool.map(player_inference_thread, [player.model for player in self.players],
[input_dict for _ in range(len(self.players))],
[res_dict for _ in range(len(self.players))],
[player.env_indices for player in self.players],
[processed_obs for _ in range(len(self.players))])
class PFSPPlayerProcessPool(PFSPPlayerPool):
def __init__(self, max_length, device):
super(PFSPPlayerProcessPool, self).__init__(max_length, device)
self.inference_processes = []
self.queues = []
self.producer_pipes = []
self.consumer_pipes = []
self.barrier = mp.Barrier(self.max_length + 1)
mp.set_start_method(method='spawn', force=True)
self._init_inference_processes()
def _init_inference_processes(self):
for _ in range(self.max_length):
queue = mp.Queue()
self.queues.append(queue)
pipe_read, pipe_write = mp.Pipe(duplex=False)
self.producer_pipes.append(pipe_write)
self.consumer_pipes.append(pipe_read)
process = mp.Process(target=player_inference_process,
args=(pipe_read, queue, self.barrier),
daemon=True)
self.inference_processes.append(process)
process.start()
self.barrier.wait()
def add_player(self, player):
with torch.no_grad():
model = dill.dumps(player.model)
for i in range(self.max_length):
if i == self.idx:
self.producer_pipes[i].send({'task': 'init'})
self.queues[i].put(model)
else:
self.producer_pipes[i].send({'task': 'continue'})
self.barrier.wait()
if len(self.players) < self.max_length:
self.players.append(player)
else:
self.players[self.idx] = player
self.idx += 1
self.idx %= self.max_length
def inference(self, input_dict, res_dict, processed_obs):
for i in range(self.max_length):
if i < len(self.players) and len(self.players[i].env_indices):
self.producer_pipes[i].send({'task': 'forward'})
self.queues[i].put(
(processed_obs, res_dict['actions'],
res_dict['values'], self.players[i].env_indices))
else:
self.producer_pipes[i].send({'task': 'continue'})
def __del__(self):
for pipe in self.producer_pipes:
pipe.send({'task': 'terminate'})
for process in self.inference_processes:
process.join()
================================================
FILE: timechamber/learning/ppo_sp_agent.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import copy
from datetime import datetime
from gym import spaces
import numpy as np
import os
import time
from .pfsp_player_pool import PFSPPlayerPool, SinglePlayer, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
PFSPPlayerVectorizedPool
from timechamber.utils.utils import load_checkpoint
from rl_games.algos_torch import a2c_continuous
from rl_games.common.a2c_common import swap_and_flatten01
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch import central_value
import torch
from torch import optim
from tensorboardX import SummaryWriter
import torch.distributed as dist
class SPAgent(a2c_continuous.A2CAgent):
def __init__(self, base_name, params):
params['config']['device'] = params['device']
super().__init__(base_name, params)
self.player_pool_type = params['player_pool_type']
self.base_model_config = {
'actions_num': self.actions_num,
'input_shape': self.obs_shape,
'num_seqs': self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value': self.normalize_value,
'normalize_input': self.normalize_input,
}
self.max_his_player_num = params['player_pool_length']
if params['op_load_path']:
self.init_op_model = self.create_model()
self.restore_op(params['op_load_path'])
else:
self.init_op_model = self.model
self.players_dir = os.path.join(self.experiment_dir, 'policy_dir')
os.makedirs(self.players_dir, exist_ok=True)
self.update_win_rate = params['update_win_rate']
self.num_opponent_agents = params['num_agents'] - 1
self.player_pool = self._build_player_pool(params)
self.games_to_check = params['games_to_check']
self.now_update_steps = 0
self.max_update_steps = params['max_update_steps']
self.update_op_num = 0
self.update_player_pool(self.init_op_model, player_idx=self.update_op_num)
self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))
assert self.num_actors % self.max_his_player_num == 0
def _build_player_pool(self, params):
if self.player_pool_type == 'multi_thread':
return PFSPPlayerProcessPool(max_length=self.max_his_player_num,
device=self.device)
elif self.player_pool_type == 'multi_process':
return PFSPPlayerThreadPool(max_length=self.max_his_player_num,
device=self.device)
elif self.player_pool_type == 'vectorized':
vector_model_config = self.base_model_config
vector_model_config['num_envs'] = self.num_actors * self.num_opponent_agents
vector_model_config['population_size'] = self.max_his_player_num
return PFSPPlayerVectorizedPool(max_length=self.max_his_player_num, device=self.device,
vector_model_config=vector_model_config, params=params)
else:
return PFSPPlayerPool(max_length=self.max_his_player_num, device=self.device)
def play_steps(self):
update_list = self.update_list
step_time = 0.0
env_done_indices = torch.tensor([], device=self.device, dtype=torch.long)
for n in range(self.horizon_length):
self.obs = self.env_reset(env_done_indices)
if self.use_action_masks:
masks = self.vec_env.get_action_masks()
res_dict = self.get_masked_action_values(self.obs, masks)
else:
res_dict_op = self.get_action_values(self.obs, is_op=True)
res_dict = self.get_action_values(self.obs)
self.experience_buffer.update_data('obses', n, self.obs['obs'])
self.experience_buffer.update_data('dones', n, self.dones)
for k in update_list:
self.experience_buffer.update_data(k, n, res_dict[k])
if self.has_central_value:
self.experience_buffer.update_data('states', n, self.obs['states'])
if self.player_pool_type == 'multi_thread':
self.player_pool.thread_pool.shutdown()
step_time_start = time.time()
self.obs, rewards, self.dones, infos = self.env_step(
torch.cat((res_dict['actions'], res_dict_op['actions']), dim=0))
step_time_end = time.time()
step_time += (step_time_end - step_time_start)
shaped_rewards = self.rewards_shaper(rewards)
if self.value_bootstrap and 'time_outs' in infos:
shaped_rewards += self.gamma * res_dict['values'] * self.cast_obs(infos['time_outs']).unsqueeze(
1).float()
self.experience_buffer.update_data('rewards', n, shaped_rewards)
self.current_rewards += rewards
self.current_lengths += 1
all_done_indices = self.dones.nonzero(as_tuple=False)
env_done_indices = self.dones.view(self.num_actors, self.num_agents).all(dim=1).nonzero(as_tuple=False)
# print(f"env done indices: {env_done_indices}")
# print(f"self.dones {self.dones}")
self.game_rewards.update(self.current_rewards[env_done_indices])
self.game_lengths.update(self.current_lengths[env_done_indices])
self.algo_observer.process_infos(infos, env_done_indices)
not_dones = 1.0 - self.dones.float()
self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
self.current_lengths = self.current_lengths * not_dones
self.player_pool.update_player_metric(infos=infos)
self.resample_op(all_done_indices.flatten())
env_done_indices = env_done_indices[:, 0]
last_values = self.get_values(self.obs)
fdones = self.dones.float()
mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
mb_values = self.experience_buffer.tensor_dict['values']
mb_rewards = self.experience_buffer.tensor_dict['rewards']
mb_advs = self.discount_values(fdones, last_values, mb_fdones, mb_values, mb_rewards)
mb_returns = mb_advs + mb_values
batch_dict = self.experience_buffer.get_transformed_list(swap_and_flatten01, self.tensor_list)
batch_dict['returns'] = swap_and_flatten01(mb_returns)
batch_dict['played_frames'] = self.batch_size
batch_dict['step_time'] = step_time
return batch_dict
def env_step(self, actions):
actions = self.preprocess_actions(actions)
obs, rewards, dones, infos = self.vec_env.step(actions)
obs['obs_op'] = obs['obs'][self.num_actors:]
obs['obs'] = obs['obs'][:self.num_actors]
if self.is_tensor_obses:
if self.value_size == 1:
rewards = rewards.unsqueeze(1)
return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
else:
if self.value_size == 1:
rewards = np.expand_dims(rewards, axis=1)
return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(
dones).to(self.ppo_device), infos
def env_reset(self, env_ids=None):
obs = self.vec_env.reset(env_ids)
obs = self.obs_to_tensors(obs)
obs['obs_op'] = obs['obs'][self.num_actors:]
obs['obs'] = obs['obs'][:self.num_actors]
return obs
def train(self):
self.init_tensors()
self.mean_rewards = self.last_mean_rewards = -100500
start_time = time.time()
total_time = 0
rep_count = 0
# self.frame = 0 # loading from checkpoint
self.obs = self.env_reset()
if self.multi_gpu:
torch.cuda.set_device(self.rank)
print("====================broadcasting parameters")
model_params = [self.model.state_dict()]
dist.broadcast_object_list(model_params, 0)
self.model.load_state_dict(model_params[0])
while True:
epoch_num = self.update_epoch()
step_time, play_time, update_time, sum_time, a_losses, c_losses, b_losses, entropies, kls, last_lr, lr_mul = self.train_epoch()
# cleaning memory to optimize space
self.dataset.update_values_dict(None)
total_time += sum_time
curr_frames = self.curr_frames * self.rank_size if self.multi_gpu else self.curr_frames
self.frame += curr_frames
should_exit = False
if self.rank == 0:
self.diagnostics.epoch(self, current_epoch=epoch_num)
scaled_time = self.num_agents * sum_time
scaled_play_time = self.num_agents * play_time
frame = self.frame // self.num_agents
if self.print_stats:
step_time = max(step_time, 1e-6)
fps_step = curr_frames / step_time
fps_step_inference = curr_frames / scaled_play_time
fps_total = curr_frames / scaled_time
print(
f'fps step: {fps_step:.0f} fps step and policy inference: {fps_step_inference:.0f} fps total: {fps_total:.0f} epoch: {epoch_num}/{self.max_epochs}')
self.write_stats(total_time, epoch_num, step_time, play_time, update_time, a_losses, c_losses,
entropies, kls, last_lr, lr_mul, frame, scaled_time, scaled_play_time, curr_frames)
self.algo_observer.after_print_stats(frame, epoch_num, total_time)
if self.game_rewards.current_size > 0:
mean_rewards = self.game_rewards.get_mean()
mean_lengths = self.game_lengths.get_mean()
self.mean_rewards = mean_rewards[0]
for i in range(self.value_size):
rewards_name = 'rewards' if i == 0 else 'rewards{0}'.format(i)
self.writer.add_scalar(rewards_name + '/step'.format(i), mean_rewards[i], frame)
self.writer.add_scalar(rewards_name + '/iter'.format(i), mean_rewards[i], epoch_num)
self.writer.add_scalar(rewards_name + '/time'.format(i), mean_rewards[i], total_time)
self.writer.add_scalar('episode_lengths/step', mean_lengths, frame)
self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
self.writer.add_scalar('episode_lengths/time', mean_lengths, total_time)
# removed equal signs (i.e. "rew=") from the checkpoint name since it messes with hydra CLI parsing
checkpoint_name = self.config['name'] + '_ep_' + str(epoch_num) + '_rew_' + str(mean_rewards[0])
if self.save_freq > 0:
if (epoch_num % self.save_freq == 0) and (mean_rewards <= self.last_mean_rewards):
self.save(os.path.join(self.nn_dir, 'last_' + checkpoint_name))
if mean_rewards[0] > self.last_mean_rewards and epoch_num >= self.save_best_after:
print('saving next best rewards: ', mean_rewards)
self.last_mean_rewards = mean_rewards[0]
self.save(os.path.join(self.nn_dir, self.config['name']))
if 'score_to_win' in self.config:
if self.last_mean_rewards > self.config['score_to_win']:
print('Network won!')
self.save(os.path.join(self.nn_dir, checkpoint_name))
should_exit = True
if epoch_num >= self.max_epochs:
if self.game_rewards.current_size == 0:
print('WARNING: Max epochs reached before any env terminated at least once')
mean_rewards = -np.inf
self.save(os.path.join(self.nn_dir,
'last_' + self.config['name'] + 'ep' + str(epoch_num) + 'rew' + str(
mean_rewards)))
print('MAX EPOCHS NUM!')
should_exit = True
self.update_metric()
update_time = 0
if self.multi_gpu:
should_exit_t = torch.tensor(should_exit, device=self.device).float()
dist.broadcast(should_exit_t, 0)
should_exit = should_exit_t.bool().item()
if should_exit:
return self.last_mean_rewards, epoch_num
def update_metric(self):
tot_win_rate = 0
tot_games_num = 0
self.now_update_steps += 1
# self_player process
for player in self.player_pool.players:
win_rate = player.win_rate()
games = player.games_num()
self.writer.add_scalar(f'rate/win_rate_player_{player.player_idx}', win_rate, self.epoch_num)
tot_win_rate += win_rate * games
tot_games_num += games
win_rate = tot_win_rate / tot_games_num
if tot_games_num > self.games_to_check:
self.check_update_opponent(win_rate)
self.writer.add_scalar('rate/win_rate', win_rate, self.epoch_num)
def get_action_values(self, obs, is_op=False):
processed_obs = self._preproc_obs(obs['obs_op'] if is_op else obs['obs'])
if not is_op:
self.model.eval()
input_dict = {
'is_train': False,
'prev_actions': None,
'obs': processed_obs,
'rnn_states': self.rnn_states
}
with torch.no_grad():
if is_op:
res_dict = {
"actions": torch.zeros((self.num_actors * self.num_opponent_agents, self.actions_num),
device=self.device),
"values": torch.zeros((self.num_actors * self.num_opponent_agents, 1), device=self.device)
}
self.player_pool.inference(input_dict, res_dict, processed_obs)
else:
res_dict = self.model(input_dict)
if self.has_central_value:
states = obs['states']
input_dict = {
'is_train': False,
'states': states,
}
value = self.get_central_value(input_dict)
res_dict['values'] = value
return res_dict
def resample_op(self, resample_indices):
for op_idx in range(self.num_opponent_agents):
for player in self.player_pool.players:
player.remove_envs(resample_indices + op_idx * self.num_actors)
for op_idx in range(self.num_opponent_agents):
for env_idx in resample_indices:
player = self.player_pool.sample_player()
player.add_envs(env_idx + op_idx * self.num_actors)
for player in self.player_pool.players:
player.reset_envs()
def resample_batch(self):
env_indices = torch.arange(end=self.num_actors * self.num_opponent_agents,
device=self.device, dtype=torch.long,
requires_grad=False)
step = self.num_actors // 32
for player in self.player_pool.players:
player.clear_envs()
for i in range(0, self.num_actors, step):
player = self.player_pool.sample_player()
player.add_envs(env_indices[i:i + step])
print("resample done")
def restore_op(self, fn):
checkpoint = load_checkpoint(fn, device=self.device)
self.init_op_model.load_state_dict(checkpoint['model'])
if self.normalize_input and 'running_mean_std' in checkpoint:
self.init_op_model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])
def check_update_opponent(self, win_rate):
if win_rate > self.update_win_rate or self.now_update_steps > self.max_update_steps:
print(f'winrate:{win_rate},add opponent to player pool')
self.update_op_num += 1
self.now_update_steps = 0
self.update_player_pool(self.model, player_idx=self.update_op_num)
self.player_pool.clear_player_metric()
self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long))
self.save(os.path.join(self.players_dir, f'policy_{self.update_op_num}'))
def create_model(self):
model = self.network.build(self.base_model_config)
model.to(self.device)
return model
def update_player_pool(self, model, player_idx):
new_model = self.create_model()
new_model.load_state_dict(copy.deepcopy(model.state_dict()))
if hasattr(model, 'running_mean_std'):
new_model.running_mean_std.load_state_dict(copy.deepcopy(model.running_mean_std.state_dict()))
player = SinglePlayer(player_idx, new_model, self.device, self.num_actors * self.num_opponent_agents)
self.player_pool.add_player(player)
================================================
FILE: timechamber/learning/ppo_sp_player.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import os
import time
import torch
import numpy as np
from rl_games.algos_torch import players
import random
from rl_games.algos_torch import torch_ext
from rl_games.common.tr_helpers import unsqueeze_obs
from rl_games.common.player import BasePlayer
from .pfsp_player_pool import PFSPPlayerPool, PFSPPlayerVectorizedPool, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \
SinglePlayer
import matplotlib.pyplot as plt
from multielo import MultiElo
def rescale_actions(low, high, action):
d = (high - low) / 2.0
m = (high + low) / 2.0
scaled_action = action * d + m
return scaled_action
class SPPlayer(BasePlayer):
def __init__(self, params):
params['config']['device_name'] = params['device']
super().__init__(params)
print(f'params:{params}')
self.network = self.config['network']
self.actions_num = self.action_space.shape[0]
self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
self.mask = [False]
self.is_rnn = False
self.normalize_input = self.config['normalize_input']
self.normalize_value = self.config.get('normalize_value', False)
self.base_model_config = {
'actions_num': self.actions_num,
'input_shape': self.obs_shape,
'num_seqs': self.num_agents,
'value_size': self.env_info.get('value_size', 1),
'normalize_value': self.normalize_value,
'normalize_input': self.normalize_input,
}
self.policy_timestep = []
self.policy_op_timestep = []
self.params = params
self.record_elo = self.player_config.get('record_elo', False)
self.init_elo = self.player_config.get('init_elo', 400)
self.num_actors = params['config']['num_actors']
self.player_pool_type = params['player_pool_type']
self.player_pool = None
self.op_player_pool = None
self.num_opponents = params['num_agents'] - 1
self.max_steps = 1000
self.update_op_num = 0
self.players_per_env = []
self.elo = MultiElo()
def restore(self, load_dir):
if os.path.isdir(load_dir):
self.player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
print('dir:', load_dir)
sorted_players = []
for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
self.policy_timestep.append(model_timestep)
model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
sorted_players.append(new_player)
sorted_players.sort(key=lambda player: player.player_idx)
for idx, player in enumerate(sorted_players):
player.player_idx = idx
self.player_pool.add_player(player)
self.policy_timestep.sort()
else:
self.player_pool = self._build_player_pool(params=self.params, player_num=1)
self.policy_timestep.append(os.path.getmtime(load_dir))
model = self.load_model(load_dir)
new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
self.player_pool.add_player(new_player)
self.restore_op(self.params['op_load_path'])
self._norm_policy_timestep()
self._alloc_env_indices()
def restore_op(self, load_dir):
if os.path.isdir(load_dir):
self.op_player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir)))
sorted_players = []
for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)):
model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint))
self.policy_op_timestep.append(model_timestep)
model = self.load_model(load_dir + '/' + str(policy_check_checkpoint))
new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device,
rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents)
sorted_players.append(new_player)
sorted_players.sort(key=lambda player: player.player_idx)
for idx, player in enumerate(sorted_players):
player.player_idx = idx
self.op_player_pool.add_player(player)
self.policy_op_timestep.sort()
else:
self.op_player_pool = self._build_player_pool(params=self.params, player_num=1)
self.policy_op_timestep.append(os.path.getmtime(load_dir))
model = self.load_model(load_dir)
new_player = SinglePlayer(player_idx=0, model=model, device=self.device,
rating=400, obs_batch_len=self.num_actors * self.num_opponents)
self.op_player_pool.add_player(new_player)
def _alloc_env_indices(self):
for idx in range(self.num_actors):
player_idx = random.randint(0, len(self.player_pool.players) - 1)
self.player_pool.players[player_idx].add_envs(torch.tensor([idx], dtype=torch.long, device=self.device))
env_player = [self.player_pool.players[player_idx]]
for op_idx in range(self.num_opponents):
op_player_idx = random.randint(0, len(self.op_player_pool.players) - 1)
self.op_player_pool.players[op_player_idx].add_envs(
torch.tensor([idx + op_idx * self.num_actors], dtype=torch.long, device=self.device))
env_player.append(self.op_player_pool.players[op_player_idx])
self.players_per_env.append(env_player)
for player in self.player_pool.players:
player.reset_envs()
for player in self.op_player_pool.players:
player.reset_envs()
def _build_player_pool(self, params, player_num):
if self.player_pool_type == 'multi_thread':
return PFSPPlayerProcessPool(max_length=player_num,
device=self.device)
elif self.player_pool_type == 'multi_process':
return PFSPPlayerThreadPool(max_length=player_num,
device=self.device)
elif self.player_pool_type == 'vectorized':
vector_model_config = self.base_model_config
vector_model_config['num_envs'] = self.num_actors * self.num_opponents
vector_model_config['population_size'] = player_num
return PFSPPlayerVectorizedPool(max_length=player_num, device=self.device,
vector_model_config=vector_model_config, params=params)
else:
return PFSPPlayerPool(max_length=player_num, device=self.device)
def _update_rating(self, info, env_indices):
for env_idx in env_indices:
if self.num_opponents == 1:
player = self.players_per_env[env_idx][0]
op_player = self.players_per_env[env_idx][1]
if info['win'][env_idx]:
player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating])
elif info['lose'][env_idx]:
op_player.rating, player.rating = self.elo.get_new_ratings([op_player.rating, player.rating])
elif info['draw'][env_idx]:
player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating],
result_order=[1, 1])
else:
ranks = info['ranks'][env_idx].cpu().numpy()
players_sorted_by_rank = sorted(enumerate(self.players_per_env[env_idx]), key=lambda x: ranks[x[0]])
sorted_ranks = sorted(ranks)
now_ratings = [player.rating for idx, player in players_sorted_by_rank]
new_ratings = self.elo.get_new_ratings(now_ratings, result_order=sorted_ranks)
# print(now_ratings, new_ratings)
# assert new_ratings[0] > 0 and new_ratings[1] > 0 and new_ratings[2] > 0
for idx, new_rating in enumerate(new_ratings):
players_sorted_by_rank[idx][1].rating = new_rating
def run(self):
n_games = self.games_num
render = self.render_env
n_game_life = self.n_game_life
is_determenistic = self.is_determenistic
sum_rewards = 0
sum_steps = 0
sum_game_res = 0
n_games = n_games * n_game_life
games_played = 0
has_masks = False
has_masks_func = getattr(self.env, "has_action_mask", None) is not None
if has_masks_func:
has_masks = self.env.has_action_mask()
print(f'games_num:{n_games}')
need_init_rnn = self.is_rnn
for _ in range(n_games):
if games_played >= n_games:
break
obses = self.env_reset(self.env)
batch_size = 1
batch_size = self.get_batch_size(obses['obs'], batch_size)
if need_init_rnn:
self.init_rnn()
need_init_rnn = False
cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
print_game_res = False
done_indices = torch.tensor([], device=self.device, dtype=torch.long)
for n in range(self.max_steps):
obses = self.env_reset(self.env, done_indices)
if has_masks:
masks = self.env.get_action_mask()
action = self.get_masked_action(
obses, masks, is_determenistic)
else:
action = self.get_action(obses['obs'], is_determenistic)
action_op = self.get_action(obses['obs_op'], is_determenistic, is_op=True)
obses, r, done, info = self.env_step(self.env, torch.cat((action, action_op), dim=0))
cr += r
steps += 1
if render:
self.env.render(mode='human')
time.sleep(self.render_sleep)
all_done_indices = done.nonzero(as_tuple=False)
done_indices = all_done_indices[::self.num_agents]
done_count = len(done_indices)
games_played += done_count
if self.record_elo:
self._update_rating(info, all_done_indices.flatten())
if done_count > 0:
if self.is_rnn:
for s in self.states:
s[:, all_done_indices, :] = s[:, all_done_indices, :] * 0.0
cur_rewards = cr[done_indices].sum().item()
cur_steps = steps[done_indices].sum().item()
cr = cr * (1.0 - done.float())
steps = steps * (1.0 - done.float())
sum_rewards += cur_rewards
sum_steps += cur_steps
game_res = 0.0
if isinstance(info, dict):
if 'battle_won' in info:
print_game_res = True
game_res = info.get('battle_won', 0.5)
if 'scores' in info:
print_game_res = True
game_res = info.get('scores', 0.5)
if self.print_stats:
if print_game_res:
print('reward:', cur_rewards / done_count,
'steps:', cur_steps / done_count, 'w:', game_res)
else:
print('reward:', cur_rewards / done_count,
'steps:', cur_steps / done_count)
sum_game_res += game_res
if batch_size // self.num_agents == 1 or games_played >= n_games:
print(f"games_player: {games_played}")
break
done_indices = done_indices[:, 0]
if self.record_elo:
self._plot_elo_curve()
def _plot_elo_curve(self):
x = np.array(self.policy_timestep)
y = np.arange(len(self.player_pool.players))
x_op = np.array(self.policy_op_timestep)
y_op = np.arange(len(self.op_player_pool.players))
for player in self.player_pool.players:
idx = player.player_idx
# print(player.player_idx, player.rating)
y[idx] = player.rating
for player in self.op_player_pool.players:
idx = player.player_idx
# print(player.player_idx, player.rating)
y_op[idx] = player.rating
if self.params['load_path'] != self.params['op_load_path']:
l1 = plt.plot(x, y, 'b--', label='policy')
l2 = plt.plot(x_op, y_op, 'r--', label='policy_op')
plt.plot(x, y, 'b^-', x_op, y_op, 'ro-')
else:
l1 = plt.plot(x, y, 'b--', label='policy')
plt.plot(x, y, 'b^-')
plt.title('ELO Curve')
plt.xlabel('timestep/days')
plt.ylabel('ElO')
plt.legend()
parent_path = os.path.dirname(self.params['load_path'])
plt.savefig(os.path.join(parent_path, 'elo.jpg'))
def get_action(self, obs, is_determenistic=False, is_op=False):
if self.has_batch_dimension == False:
obs = unsqueeze_obs(obs)
obs = self._preproc_obs(obs)
input_dict = {
'is_train': False,
'prev_actions': None,
'obs': obs,
'rnn_states': self.states
}
with torch.no_grad():
data_len = self.num_actors * self.num_opponents if is_op else self.num_actors
res_dict = {
"actions": torch.zeros((data_len, self.actions_num), device=self.device),
"values": torch.zeros((data_len, 1), device=self.device),
"mus": torch.zeros((data_len, self.actions_num), device=self.device)
}
if is_op:
self.op_player_pool.inference(input_dict, res_dict, obs)
else:
self.player_pool.inference(input_dict, res_dict, obs)
mu = res_dict['mus']
action = res_dict['actions']
# self.states = res_dict['rnn_states']
if is_determenistic:
current_action = mu
else:
current_action = action
if self.has_batch_dimension == False:
current_action = torch.squeeze(current_action.detach())
if self.clip_actions:
return rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0))
else:
return current_action
def _norm_policy_timestep(self):
self.policy_op_timestep.sort()
self.policy_timestep.sort()
for idx in range(1, len(self.policy_op_timestep)):
self.policy_op_timestep[idx] -= self.policy_op_timestep[0]
self.policy_op_timestep[idx] /= 3600 * 24
for idx in range(1, len(self.policy_timestep)):
self.policy_timestep[idx] -= self.policy_timestep[0]
self.policy_timestep[idx] /= 3600 * 24
self.policy_timestep[0] = 0
if len(self.policy_op_timestep):
self.policy_op_timestep[0] = 0
def env_reset(self, env, done_indices=None):
obs = env.reset(done_indices)
obs_dict = {}
obs_dict['obs_op'] = obs[self.num_actors:]
obs_dict['obs'] = obs[:self.num_actors]
return obs_dict
def env_step(self, env, actions):
obs, rewards, dones, infos = env.step(actions)
if hasattr(obs, 'dtype') and obs.dtype == np.float64:
obs = np.float32(obs)
obs_dict = {}
obs_dict['obs_op'] = obs[self.num_actors:]
obs_dict['obs'] = obs[:self.num_actors]
if self.value_size > 1:
rewards = rewards[0]
if self.is_tensor_obses:
return self.obs_to_torch(obs_dict), rewards.cpu(), dones.cpu(), infos
else:
if np.isscalar(dones):
rewards = np.expand_dims(np.asarray(rewards), 0)
dones = np.expand_dims(np.asarray(dones), 0)
return obs_dict, rewards, dones, infos
def create_model(self):
model = self.network.build(self.base_model_config)
model.to(self.device)
return model
def load_model(self, fn):
model = self.create_model()
checkpoint = torch_ext.safe_filesystem_op(torch.load, fn, map_location=self.device)
model.load_state_dict(checkpoint['model'])
if self.normalize_input and 'running_mean_std' in checkpoint:
model.running_mean_std.load_state_dict(checkpoint['running_mean_std'])
return model
================================================
FILE: timechamber/learning/replay_buffer.py
================================================
# License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE]
import torch
class ReplayBuffer():
def __init__(self, buffer_size, device):
self._head = 0
self._total_count = 0
self._buffer_size = buffer_size
self._device = device
self._data_buf = None
self._sample_idx = torch.randperm(buffer_size)
self._sample_head = 0
return
def reset(self):
self._head = 0
self._total_count = 0
self._reset_sample_idx()
return
def get_buffer_size(self):
return self._buffer_size
def get_total_count(self):
return self._total_count
def store(self, data_dict):
if (self._data_buf is None):
self._init_data_buf(data_dict)
n = next(iter(data_dict.values())).shape[0]
buffer_size = self.get_buffer_size()
assert (n < buffer_size)
for key, curr_buf in self._data_buf.items():
curr_n = data_dict[key].shape[0]
assert (n == curr_n)
store_n = min(curr_n, buffer_size - self._head)
curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n]
remainder = n - store_n
if (remainder > 0):
curr_buf[0:remainder] = data_dict[key][store_n:]
self._head = (self._head + n) % buffer_size
self._total_count += n
return
def sample(self, n):
total_count = self.get_total_count()
buffer_size = self.get_buffer_size()
idx = torch.arange(self._sample_head, self._sample_head + n)
idx = idx % buffer_size
rand_idx = self._sample_idx[idx]
if (total_count < buffer_size):
rand_idx = rand_idx % self._head
samples = dict()
for k, v in self._data_buf.items():
samples[k] = v[rand_idx]
self._sample_head += n
if (self._sample_head >= buffer_size):
self._reset_sample_idx()
return samples
def _reset_sample_idx(self):
buffer_size = self.get_buffer_size()
self._sample_idx[:] = torch.randperm(buffer_size)
self._sample_head = 0
return
def _init_data_buf(self, data_dict):
buffer_size = self.get_buffer_size()
self._data_buf = dict()
for k, v in data_dict.items():
v_shape = v.shape[1:]
self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device)
return
================================================
FILE: timechamber/learning/vectorized_models.py
================================================
import torch
import torch.nn as nn
from rl_games.algos_torch.running_mean_std import RunningMeanStd, RunningMeanStdObs
from rl_games.algos_torch import torch_ext
from rl_games.algos_torch.models import ModelA2CContinuousLogStd
class VectorizedRunningMeanStd(RunningMeanStd):
def __init__(self, insize, population_size, epsilon=1e-05, per_channel=False, norm_only=False, is_training=False):
# input shape: population_size*batch_size*(insize)
super(VectorizedRunningMeanStd, self).__init__(population_size, epsilon, per_channel, norm_only)
self.insize = insize
self.epsilon = epsilon
self.population_size = population_size
self.training = is_training
self.norm_only = norm_only
self.per_channel = per_channel
if per_channel:
if len(self.insize) == 3:
self.axis = [1, 3, 4]
if len(self.insize) == 2:
self.axis = [1, 3]
if len(self.insize) == 1:
self.axis = [1]
in_size = self.insize[1]
else:
self.axis = [1]
in_size = insize
# print(in_size)
self.register_buffer("running_mean", torch.zeros((population_size, *in_size), dtype=torch.float32))
self.register_buffer("running_var", torch.ones((population_size, *in_size), dtype=torch.float32))
self.register_buffer("count", torch.ones((population_size, 1), dtype=torch.float32))
def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count):
delta = batch_mean - mean
tot_count = count + batch_count
new_mean = mean + delta * batch_count / tot_count
m_a = var * count
m_b = batch_var * batch_count
M2 = m_a + m_b + delta ** 2 * count * batch_count / tot_count
new_var = M2 / tot_count
new_count = tot_count
return new_mean, new_var, new_count
def forward(self, input, unnorm=False, mask=None):
if self.training:
if mask is not None:
mean, var = torch_ext.get_mean_std_with_masks(input, mask)
else:
mean = input.mean(self.axis) # along channel axis
var = input.var(self.axis)
self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments(
self.running_mean, self.running_var, self.count,
mean, var, input.size()[1])
# change shape
if self.per_channel:
if len(self.insize) == 3:
current_mean = self.running_mean.view([self.population_size, 1, self.insize[0], 1, 1]).expand_as(input)
current_var = self.running_var.view([self.population_size, 1, self.insize[0], 1, 1]).expand_as(input)
if len(self.insize) == 2:
current_mean = self.running_mean.view([self.population_size, 1, self.insize[0], 1]).expand_as(input)
current_var = self.running_var.view([self.population_size, 1, self.insize[0], 1]).expand_as(input)
if len(self.insize) == 1:
current_mean = self.running_mean.view([self.population_size, 1, self.insize[0]]).expand_as(input)
current_var = self.running_var.view([self.population_size, 1, self.insize[0]]).expand_as(input)
else:
current_mean = self.running_mean
current_var = self.running_var
# get output
if unnorm:
y = torch.clamp(input, min=-5.0, max=5.0)
y = torch.sqrt(torch.unsqueeze(current_var.float(), 1) + self.epsilon) * y + torch.unsqueeze(
current_mean.float(), 1)
else:
if self.norm_only:
y = input / torch.sqrt(current_var.float() + self.epsilon)
else:
y = (input - torch.unsqueeze(current_mean.float(), 1)) / torch.sqrt(
torch.unsqueeze(current_var.float(), 1) + self.epsilon)
y = torch.clamp(y, min=-5.0, max=5.0)
return y
class ModelVectorizedA2C(ModelA2CContinuousLogStd):
def __init__(self, network):
super().__init__(network)
return
def build(self, config):
net = self.network_builder.build('vectorized_a2c', **config)
for name, _ in net.named_parameters():
print(name)
obs_shape = config['input_shape']
population_size = config['population_size']
normalize_value = config.get('normalize_value', False)
normalize_input = config.get('normalize_input', False)
value_size = config.get('value_size', 1)
return self.Network(net, population_size, obs_shape=obs_shape,
normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size, )
class Network(ModelA2CContinuousLogStd.Network):
def __init__(self, a2c_network, population_size, obs_shape, normalize_value, normalize_input, value_size):
self.population_size = population_size
super().__init__(a2c_network, obs_shape=obs_shape,
normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size)
if normalize_value:
self.value_mean_std = VectorizedRunningMeanStd((self.value_size,), self.population_size)
if normalize_input:
if isinstance(obs_shape, dict):
self.running_mean_std = RunningMeanStdObs(obs_shape)
else:
self.running_mean_std = VectorizedRunningMeanStd(obs_shape, self.population_size)
def update(self, population_idx, network):
for key in self.state_dict():
param1 = self.state_dict()[key]
param2 = network.state_dict()[key]
if len(param1.shape) == len(param2.shape):
self.state_dict()[key] = param2
elif len(param2.shape) == 1:
if len(param1.shape) == 3:
param1[population_idx] = torch.unsqueeze(param2, dim=0)
else:
param1[population_idx] = param2
elif len(param2.shape) == 2:
param1[population_idx] = torch.transpose(param2, 0, 1)
================================================
FILE: timechamber/learning/vectorized_network_builder.py
================================================
import torch
import torch.nn as nn
import math
from rl_games.algos_torch import network_builder
class VectorizedLinearLayer(torch.nn.Module):
"""Vectorized version of torch.nn.Linear."""
def __init__(
self,
population_size: int,
in_features: int,
out_features: int,
use_layer_norm: bool = False,
):
super().__init__()
self._population_size = population_size
self._in_features = in_features
self._out_features = out_features
self.weight = torch.nn.Parameter(
torch.empty(self._population_size, self._in_features, self._out_features),
requires_grad=True,
)
self.bias = torch.nn.Parameter(
torch.empty(self._population_size, 1, self._out_features),
requires_grad=True,
)
for member_id in range(population_size):
torch.nn.init.kaiming_uniform_(self.weight[member_id], a=math.sqrt(5))
fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight[0])
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
torch.nn.init.uniform_(self.bias, -bound, bound)
self._layer_norm = (
torch.nn.LayerNorm(self._out_features, self._population_size)
if use_layer_norm
else None
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
assert x.shape[0] == self._population_size
if self._layer_norm is not None:
return self._layer_norm(x.matmul(self.weight) + self.bias)
return x.matmul(self.weight) + self.bias
class VectorizedA2CBuilder(network_builder.A2CBuilder):
def __init__(self, **kwargs):
super().__init__(**kwargs)
return
class Network(network_builder.A2CBuilder.Network):
def __init__(self, params, **kwargs):
self.population_size = kwargs.get('population_size')
super().__init__(params, **kwargs)
self.value = VectorizedLinearLayer(population_size=self.population_size,
in_features=self.units[-1],
out_features=self.value_size)
actions_num = kwargs.get('actions_num')
self.mu = VectorizedLinearLayer(self.population_size, self.units[-1], actions_num)
if self.fixed_sigma:
self.sigma = nn.Parameter(
torch.zeros((self.population_size, 1, actions_num), requires_grad=True, dtype=torch.float32),
requires_grad=True)
else:
self.sigma = VectorizedLinearLayer(self.population_size, self.units[-1], actions_num)
def _build_vectorized_mlp(self,
input_size,
units,
activation,
norm_func_name=None):
print(f'build vectorized mlp:{self.population_size}x{input_size}')
in_size = input_size
layers = []
for unit in units:
layers.append(
VectorizedLinearLayer(self.population_size, in_size, unit, norm_func_name == 'layer_norm'))
layers.append(self.activations_factory.create(activation))
in_size = unit
return nn.Sequential(*layers)
def _build_mlp(self,
input_size,
units,
activation,
dense_func,
norm_only_first_layer=False,
norm_func_name=None,
d2rl=False):
return self._build_vectorized_mlp(input_size, units, activation, norm_func_name=norm_func_name)
def forward(self, obs_dict): # implement continues situation
obs = obs_dict['obs']
states = obs_dict.get('rnn_states', None)
out = self.actor_mlp(obs)
value = self.value_act(self.value(out))
mu = self.mu_act(self.mu(out))
if self.fixed_sigma:
sigma = self.sigma_act(self.sigma)
else:
sigma = self.sigma_act(self.sigma(out))
return mu, mu * 0 + sigma, value, states
def load(self, params):
super().load(params)
def build(self, name, **kwargs):
net = VectorizedA2CBuilder.Network(self.params, **kwargs)
return net
================================================
FILE: timechamber/models/Humanoid_Strike/policy.pth
================================================
[File too large to display: 19.5 MB]
================================================
FILE: timechamber/models/Humanoid_Strike/policy_op.pth
================================================
[File too large to display: 19.5 MB]
================================================
FILE: timechamber/tasks/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from .ma_ant_sumo import MA_Ant_Sumo
from .ma_ant_battle import MA_Ant_Battle
from .ma_humanoid_strike import HumanoidStrike
# Mappings from strings to environments
isaacgym_task_map = {
"MA_Ant_Sumo": MA_Ant_Sumo,
"MA_Ant_Battle": MA_Ant_Battle,
"MA_Humanoid_Strike": HumanoidStrike
}
================================================
FILE: timechamber/tasks/ase_humanoid_base/base_task.py
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import sys
import os
import operator
from copy import deepcopy
import random
from isaacgym import gymapi
from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples
import numpy as np
import torch
# Base class for RL tasks
class BaseTask():
def __init__(self, cfg, enable_camera_sensors=False):
self.gym = gymapi.acquire_gym()
self.device_type = cfg.get("device_type", "cuda")
self.device_id = cfg.get("device_id", 0)
self.device = "cpu"
if self.device_type == "cuda" or self.device_type == "GPU":
self.device = "cuda" + ":" + str(self.device_id)
self.headless = cfg["headless"]
self.num_agents = cfg["env"].get("numAgents", 1) # used for multi-agent environments
# double check!
self.graphics_device_id = self.device_id
if enable_camera_sensors == False and self.headless == True:
self.graphics_device_id = -1
self.num_envs = cfg["env"]["numEnvs"]
self.num_obs = cfg["env"]["numObservations"]
self.num_states = cfg["env"].get("numStates", 0)
self.num_actions = cfg["env"]["numActions"]
self.control_freq_inv = cfg["env"].get("controlFrequencyInv", 1)
# optimization flags for pytorch JIT
torch._C._jit_set_profiling_mode(False)
torch._C._jit_set_profiling_executor(False)
# allocate buffers
self.obs_buf = torch.zeros(
(self.num_envs, self.num_obs), device=self.device, dtype=torch.float)
self.states_buf = torch.zeros(
(self.num_envs, self.num_states), device=self.device, dtype=torch.float)
self.rew_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.float)
self.reset_buf = torch.ones(
self.num_envs, device=self.device, dtype=torch.long)
self.progress_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.randomize_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.extras = {}
self.original_props = {}
self.dr_randomizations = {}
self.first_randomization = True
self.actor_params_generator = None
self.extern_actor_params = {}
for env_id in range(self.num_envs):
self.extern_actor_params[env_id] = None
self.last_step = -1
self.last_rand_step = -1
# create envs, sim and viewer
self.create_sim()
self.gym.prepare_sim(self.sim)
# todo: read from config
self.enable_viewer_sync = True
self.viewer = None
# if running with a viewer, set up keyboard shortcuts and camera
if self.headless == False:
# subscribe to keyboard shortcuts
self.viewer = self.gym.create_viewer(
self.sim, gymapi.CameraProperties())
self.gym.subscribe_viewer_keyboard_event(
self.viewer, gymapi.KEY_ESCAPE, "QUIT")
self.gym.subscribe_viewer_keyboard_event(
self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
# set the camera position based on up axis
sim_params = self.gym.get_sim_params(self.sim)
if sim_params.up_axis == gymapi.UP_AXIS_Z:
cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
else:
cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
cam_target = gymapi.Vec3(10.0, 0.0, 15.0)
self.gym.viewer_camera_look_at(
self.viewer, None, cam_pos, cam_target)
# set gravity based on up axis and return axis index
def set_sim_params_up_axis(self, sim_params, axis):
if axis == 'z':
sim_params.up_axis = gymapi.UP_AXIS_Z
sim_params.gravity.x = 0
sim_params.gravity.y = 0
sim_params.gravity.z = -9.81
return 2
return 1
def create_sim(self, compute_device, graphics_device, physics_engine, sim_params):
sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params)
if sim is None:
print("*** Failed to create sim")
quit()
return sim
def step(self, actions):
if self.dr_randomizations.get('actions', None):
actions = self.dr_randomizations['actions']['noise_lambda'](actions)
# apply actions
self.pre_physics_step(actions)
# step physics and render each frame
self._physics_step()
# to fix!
if self.device == 'cpu':
self.gym.fetch_results(self.sim, True)
# compute observations, rewards, resets, ...
self.post_physics_step()
if self.dr_randomizations.get('observations', None):
self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)
def get_states(self):
return self.states_buf
def render(self, sync_frame_time=False):
if self.viewer:
# check for window closed
if self.gym.query_viewer_has_closed(self.viewer):
sys.exit()
# check for keyboard events
for evt in self.gym.query_viewer_action_events(self.viewer):
if evt.action == "QUIT" and evt.value > 0:
sys.exit()
elif evt.action == "toggle_viewer_sync" and evt.value > 0:
self.enable_viewer_sync = not self.enable_viewer_sync
# fetch results
if self.device != 'cpu':
self.gym.fetch_results(self.sim, True)
# step graphics
if self.enable_viewer_sync:
self.gym.step_graphics(self.sim)
self.gym.draw_viewer(self.viewer, self.sim, True)
else:
self.gym.poll_viewer_events(self.viewer)
def get_actor_params_info(self, dr_params, env):
"""Returns a flat array of actor params, their names and ranges."""
if "actor_params" not in dr_params:
return None
params = []
names = []
lows = []
highs = []
param_getters_map = get_property_getter_map(self.gym)
for actor, actor_properties in dr_params["actor_params"].items():
handle = self.gym.find_actor_handle(env, actor)
for prop_name, prop_attrs in actor_properties.items():
if prop_name == 'color':
continue # this is set randomly
props = param_getters_map[prop_name](env, handle)
if not isinstance(props, list):
props = [props]
for prop_idx, prop in enumerate(props):
for attr, attr_randomization_params in prop_attrs.items():
name = prop_name+'_'+str(prop_idx)+'_'+attr
lo_hi = attr_randomization_params['range']
distr = attr_randomization_params['distribution']
if 'uniform' not in distr:
lo_hi = (-1.0*float('Inf'), float('Inf'))
if isinstance(prop, np.ndarray):
for attr_idx in range(prop[attr].shape[0]):
params.append(prop[attr][attr_idx])
names.append(name+'_'+str(attr_idx))
lows.append(lo_hi[0])
highs.append(lo_hi[1])
else:
params.append(getattr(prop, attr))
names.append(name)
lows.append(lo_hi[0])
highs.append(lo_hi[1])
return params, names, lows, highs
# Apply randomizations only on resets, due to current PhysX limitations
def apply_randomizations(self, dr_params):
# If we don't have a randomization frequency, randomize every step
rand_freq = dr_params.get("frequency", 1)
# First, determine what to randomize:
# - non-environment parameters when > frequency steps have passed since the last non-environment
# - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
# - on the first call, randomize everything
self.last_step = self.gym.get_frame_count(self.sim)
if self.first_randomization:
do_nonenv_randomize = True
env_ids = list(range(self.num_envs))
else:
do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf))
rand_envs = torch.logical_and(rand_envs, self.reset_buf)
env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
self.randomize_buf[rand_envs] = 0
if do_nonenv_randomize:
self.last_rand_step = self.last_step
param_setters_map = get_property_setter_map(self.gym)
param_setter_defaults_map = get_default_setter_args(self.gym)
param_getters_map = get_property_getter_map(self.gym)
# On first iteration, check the number of buckets
if self.first_randomization:
check_buckets(self.gym, self.envs, dr_params)
for nonphysical_param in ["observations", "actions"]:
if nonphysical_param in dr_params and do_nonenv_randomize:
dist = dr_params[nonphysical_param]["distribution"]
op_type = dr_params[nonphysical_param]["operation"]
sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None
sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None
op = operator.add if op_type == 'additive' else operator.mul
if sched_type == 'linear':
sched_scaling = 1.0 / sched_step * \
min(self.last_step, sched_step)
elif sched_type == 'constant':
sched_scaling = 0 if self.last_step < sched_step else 1
else:
sched_scaling = 1
if dist == 'gaussian':
mu, var = dr_params[nonphysical_param]["range"]
mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
if op_type == 'additive':
mu *= sched_scaling
var *= sched_scaling
mu_corr *= sched_scaling
var_corr *= sched_scaling
elif op_type == 'scaling':
var = var * sched_scaling # scale up var over time
mu = mu * sched_scaling + 1.0 * \
(1.0 - sched_scaling) # linearly interpolate
var_corr = var_corr * sched_scaling # scale up var over time
mu_corr = mu_corr * sched_scaling + 1.0 * \
(1.0 - sched_scaling) # linearly interpolate
def noise_lambda(tensor, param_name=nonphysical_param):
params = self.dr_randomizations[param_name]
corr = params.get('corr', None)
if corr is None:
corr = torch.randn_like(tensor)
params['corr'] = corr
corr = corr * params['var_corr'] + params['mu_corr']
return op(
tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])
self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda}
elif dist == 'uniform':
lo, hi = dr_params[nonphysical_param]["range"]
lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
if op_type == 'additive':
lo *= sched_scaling
hi *= sched_scaling
lo_corr *= sched_scaling
hi_corr *= sched_scaling
elif op_type == 'scaling':
lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
def noise_lambda(tensor, param_name=nonphysical_param):
params = self.dr_randomizations[param_name]
corr = params.get('corr', None)
if corr is None:
corr = torch.randn_like(tensor)
params['corr'] = corr
corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])
self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda}
if "sim_params" in dr_params and do_nonenv_randomize:
prop_attrs = dr_params["sim_params"]
prop = self.gym.get_sim_params(self.sim)
if self.first_randomization:
self.original_props["sim_params"] = {
attr: getattr(prop, attr) for attr in dir(prop)}
for attr, attr_randomization_params in prop_attrs.items():
apply_random_samples(
prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)
self.gym.set_sim_params(self.sim, prop)
# If self.actor_params_generator is initialized: use it to
# sample actor simulation params. This gives users the
# freedom to generate samples from arbitrary distributions,
# e.g. use full-covariance distributions instead of the DR's
# default of treating each simulation parameter independently.
extern_offsets = {}
if self.actor_params_generator is not None:
for env_id in env_ids:
self.extern_actor_params[env_id] = \
self.actor_params_generator.sample()
extern_offsets[env_id] = 0
for actor, actor_properties in dr_params["actor_params"].items():
for env_id in env_ids:
env = self.envs[env_id]
handle = self.gym.find_actor_handle(env, actor)
extern_sample = self.extern_actor_params[env_id]
for prop_name, prop_attrs in actor_properties.items():
if prop_name == 'color':
num_bodies = self.gym.get_actor_rigid_body_count(
env, handle)
for n in range(num_bodies):
self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)))
continue
if prop_name == 'scale':
attr_randomization_params = prop_attrs
sample = generate_random_samples(attr_randomization_params, 1,
self.last_step, None)
og_scale = 1
if attr_randomization_params['operation'] == 'scaling':
new_scale = og_scale * sample
elif attr_randomization_params['operation'] == 'additive':
new_scale = og_scale + sample
self.gym.set_actor_scale(env, handle, new_scale)
continue
prop = param_getters_map[prop_name](env, handle)
if isinstance(prop, list):
if self.first_randomization:
self.original_props[prop_name] = [
{attr: getattr(p, attr) for attr in dir(p)} for p in prop]
for p, og_p in zip(prop, self.original_props[prop_name]):
for attr, attr_randomization_params in prop_attrs.items():
smpl = None
if self.actor_params_generator is not None:
smpl, extern_offsets[env_id] = get_attr_val_from_sample(
extern_sample, extern_offsets[env_id], p, attr)
apply_random_samples(
p, og_p, attr, attr_randomization_params,
self.last_step, smpl)
else:
if self.first_randomization:
self.original_props[prop_name] = deepcopy(prop)
for attr, attr_randomization_params in prop_attrs.items():
smpl = None
if self.actor_params_generator is not None:
smpl, extern_offsets[env_id] = get_attr_val_from_sample(
extern_sample, extern_offsets[env_id], prop, attr)
apply_random_samples(
prop, self.original_props[prop_name], attr,
attr_randomization_params, self.last_step, smpl)
setter = param_setters_map[prop_name]
default_args = param_setter_defaults_map[prop_name]
setter(env, handle, prop, *default_args)
if self.actor_params_generator is not None:
for env_id in env_ids: # check that we used all dims in sample
if extern_offsets[env_id] > 0:
extern_sample = self.extern_actor_params[env_id]
if extern_offsets[env_id] != extern_sample.shape[0]:
print('env_id', env_id,
'extern_offset', extern_offsets[env_id],
'vs extern_sample.shape', extern_sample.shape)
raise Exception("Invalid extern_sample size")
self.first_randomization = False
def pre_physics_step(self, actions):
raise NotImplementedError
def _physics_step(self):
for i in range(self.control_freq_inv):
self.render()
self.gym.simulate(self.sim)
return
def post_physics_step(self):
raise NotImplementedError
def get_attr_val_from_sample(sample, offset, prop, attr):
"""Retrieves param value for the given prop and attr from the sample."""
if sample is None:
return None, 0
if isinstance(prop, np.ndarray):
smpl = sample[offset:offset+prop[attr].shape[0]]
return smpl, offset+prop[attr].shape[0]
else:
return sample[offset], offset+1
================================================
FILE: timechamber/tasks/ase_humanoid_base/humanoid.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import os
import torch
from isaacgym import gymtorch
from isaacgym import gymapi
from isaacgym.torch_utils import *
from timechamber.utils import torch_utils
from timechamber.utils.utils import print_actor_info, print_asset_info
from timechamber.tasks.ase_humanoid_base.base_task import BaseTask
class Humanoid(BaseTask):
def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
self.cfg = cfg
self.sim_params = sim_params
self.physics_engine = physics_engine
##
self.borderline_space = self.cfg["env"]["borderlineSpace"]
self.num_agents = self.cfg["env"].get("numAgents", 1)
self._pd_control = self.cfg["env"]["pdControl"]
self.power_scale = self.cfg["env"]["powerScale"]
self.debug_viz = self.cfg["env"]["enableDebugVis"]
self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
self.max_episode_length = self.cfg["env"]["episodeLength"]
self._local_root_obs = self.cfg["env"]["localRootObs"]
self._root_height_obs = self.cfg["env"].get("rootHeightObs", True)
self._enable_early_termination = self.cfg["env"]["enableEarlyTermination"]
key_bodies = self.cfg["env"]["keyBodies"]
self._setup_character_props(key_bodies)
self.cfg["env"]["numObservations"] = self.get_obs_size()
self.cfg["env"]["numActions"] = self.get_action_size()
self.cfg["device_type"] = device_type
self.cfg["device_id"] = device_id
self.cfg["headless"] = headless
super().__init__(cfg=self.cfg)
self.dt = self.control_freq_inv * sim_params.dt
# get gym GPU state tensors
actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
# print(f"dof_state_tensor shape: {dof_state_tensor.shape}")
sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim)
contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim)
sensors_per_env = 2
self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs * self.num_agents, sensors_per_env * 6)
dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs * self.num_agents, self.num_dof)
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_rigid_body_state_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
self._root_states = gymtorch.wrap_tensor(actor_root_state)
# print(f'root_states:{self._root_states.shape}')
num_actors = self.get_num_actors_per_env()
# print(f"num actors: {num_actors}")
self._humanoid_root_states = self._root_states
# print(f"humanoid_root_states shape: {self._humanoid_root_states.shape}") # (num_envs*2, 13)
self._initial_humanoid_root_states = self._humanoid_root_states.clone()
self._initial_humanoid_root_states[:, 7:13] = 0 # zero for linear vel and angular vel
self._humanoid_actor_ids = num_actors * torch.arange(self.num_envs, device=self.device, dtype=torch.int32)
# print(f"humanoid_actor_ids: {self._humanoid_actor_ids}") # 0, 2, 4, 6...
# print(f"humanoid indices: {self.humanoid_indices}") # 0, 2, 4, 6...
# print(f"humanooid op indices: {self.humanoid_indices_op}") # 1, 3, 5, 7...
# create some wrapper tensors for different slices
self._dof_state = gymtorch.wrap_tensor(dof_state_tensor)
dofs_per_env = self._dof_state.shape[0] // self.num_envs
self._dof_pos = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 0]
self._dof_vel = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 1]
# op
self._dof_pos_op = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., self.num_dof:, 0]
self._dof_vel_op = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., self.num_dof:, 1]
self._initial_dof_pos = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float)
self._initial_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float)
# op
self._initial_dof_pos_op = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float)
self._initial_dof_vel_op = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float)
self._rigid_body_state = gymtorch.wrap_tensor(rigid_body_state)
bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs
rigid_body_state_reshaped = self._rigid_body_state.view(self.num_envs, bodies_per_env, 13)
self._rigid_body_pos = rigid_body_state_reshaped[..., :self.num_bodies, 0:3]
self._rigid_body_rot = rigid_body_state_reshaped[..., :self.num_bodies, 3:7]
self._rigid_body_vel = rigid_body_state_reshaped[..., :self.num_bodies, 7:10]
self._rigid_body_ang_vel = rigid_body_state_reshaped[..., :self.num_bodies, 10:13]
# op
self._rigid_body_pos_op = rigid_body_state_reshaped[..., self.num_bodies:, 0:3]
self._rigid_body_rot_op = rigid_body_state_reshaped[..., self.num_bodies:, 3:7]
self._rigid_body_vel_op = rigid_body_state_reshaped[..., self.num_bodies:, 7:10]
self._rigid_body_ang_vel_op = rigid_body_state_reshaped[..., self.num_bodies:, 10:13]
contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor)
self._contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., :self.num_bodies, :]
self._contact_forces_op = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., self.num_bodies:, :]
self._terminate_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
self._build_termination_heights()
contact_bodies = self.cfg["env"]["contactBodies"]
self._key_body_ids = self._build_key_body_ids_tensor(key_bodies)
self._contact_body_ids = self._build_contact_body_ids_tensor(contact_bodies)
self.allocate_buffers()
return
def get_obs_size(self):
return self._num_obs
def get_action_size(self):
return self._num_actions
def get_num_actors_per_env(self):
num_actors = self._root_states.shape[0] // self.num_envs
return num_actors
def _add_circle_borderline(self, env):
lines = []
borderline_height = 0.01
for height in range(20):
for angle in range(360):
begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height]
end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height]
lines.append(begin_point)
lines.append(end_point)
lines = np.array(lines, dtype=np.float32) * self.borderline_space
colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32)
self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors)
def _add_rectangle_borderline(self, env):
lines = []
colors = np.zeros((90*60, 3), dtype=np.float32)
for k in range(4):
for height in range(10):
lines1 = []
lines2 = []
lines3 = []
lines4 = []
for i in range(90):
begin_point1 = [-self.borderline_space + i * self.borderline_space / 45,
self.borderline_space,
height*0.01+ k*0.25]
end_point1 = [-self.borderline_space + (i+1) * self.borderline_space / 45,
self.borderline_space,
height*0.01+ k*0.25]
begin_point2 = [self.borderline_space,
self.borderline_space - i * self.borderline_space / 45,
height*0.01+ k*0.25]
end_point2 = [self.borderline_space,
self.borderline_space - (i+1) * self.borderline_space / 45,
height*0.01+ k*0.25]
begin_point3 = [self.borderline_space - i * self.borderline_space / 45,
-self.borderline_space,
height*0.01+ k*0.25]
end_point3 = [self.borderline_space - (i+1) * self.borderline_space / 45,
-self.borderline_space,
height*0.01+ k*0.25]
begin_point4 = [-self.borderline_space ,
-self.borderline_space + i * self.borderline_space / 45,
height*0.01+ k*0.25]
end_point4 = [-self.borderline_space,
-self.borderline_space + (i+1) * self.borderline_space / 45,
height*0.01+ k*0.25]
lines1.append(begin_point1)
lines1.append(end_point1)
lines2.append(begin_point2)
lines2.append(end_point2)
lines3.append(begin_point3)
lines3.append(end_point3)
lines4.append(begin_point4)
lines4.append(end_point4)
lines.extend(lines1)
lines.extend(lines2)
lines.extend(lines3)
lines.extend(lines4)
lines = np.array(lines, dtype=np.float32)
colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32)
self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors)
def allocate_buffers(self):
self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device,
dtype=torch.float)
self.states_buf = torch.zeros(
(self.num_envs, self.num_states), device=self.device, dtype=torch.float)
self.rew_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.float)
self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
self.timeout_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.progress_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.randomize_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.extras = {
'win': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
'lose': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
'draw': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool)}
self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
def create_sim(self):
self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
self._create_ground_plane()
self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
return
def reset(self, env_ids=None):
if (env_ids is None):
env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long)
self._reset_envs(env_ids)
return
def set_char_color(self, col, env_ids):
for env_id in env_ids:
env_ptr = self.envs[env_id]
handle = self.humanoid_handles[env_id]
for j in range(self.num_bodies):
self.gym.set_rigid_body_color(env_ptr, handle, j, gymapi.MESH_VISUAL,
gymapi.Vec3(col[0], col[1], col[2]))
return
def _reset_envs(self, env_ids):
if (len(env_ids) > 0):
self._reset_actors(env_ids)
self._reset_env_tensors(env_ids)
self._refresh_sim_tensors()
self._compute_observations()
return
def _reset_env_tensors(self, env_ids):
# env_ids_int32 = self._humanoid_actor_ids[env_ids]
env_ids_int32 = (torch.cat((self.humanoid_indices[env_ids],
self.humanoid_indices_op[env_ids]))).to(dtype=torch.int32)
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self._root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self._dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.progress_buf[env_ids] = 0
self.reset_buf[env_ids] = 0
self._terminate_buf[env_ids] = 0
return
def _create_ground_plane(self):
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.plane_static_friction
plane_params.dynamic_friction = self.plane_dynamic_friction
plane_params.restitution = self.plane_restitution
self.gym.add_ground(self.sim, plane_params)
return
def _setup_character_props(self, key_bodies):
asset_file = self.cfg["env"]["asset"]["assetFileName"]
num_key_bodies = len(key_bodies)
if (asset_file == "mjcf/amp_humanoid.xml"):
self._dof_body_ids = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14]
self._dof_offsets = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28]
self._dof_obs_size = 72
self._num_actions = 28
self._num_obs = 1 + 15 * (3 + 6 + 3 + 3) - 3
elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
self._dof_body_ids = [1, 2, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16]
self._dof_offsets = [0, 3, 6, 9, 10, 13, 16, 17, 20, 21, 24, 27, 28, 31]
self._dof_obs_size = 78
self._num_actions = 31
self._num_obs = 1 + 17 * (3 + 6 + 3 + 3) - 3
else:
print("Unsupported character config file: {s}".format(asset_file))
assert(False)
return
def _build_termination_heights(self):
head_term_height = 0.3
shield_term_height = 0.32
termination_height = self.cfg["env"]["terminationHeight"]
self._termination_heights = np.array([termination_height] * self.num_bodies)
head_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "head")
self._termination_heights[head_id] = max(head_term_height, self._termination_heights[head_id])
asset_file = self.cfg["env"]["asset"]["assetFileName"]
if (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
left_arm_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "left_lower_arm")
self._termination_heights[left_arm_id] = max(shield_term_height, self._termination_heights[left_arm_id])
self._termination_heights = to_torch(self._termination_heights, device=self.device)
return
def _create_envs(self, num_envs, spacing, num_per_row):
lower = gymapi.Vec3(-spacing, -spacing, 0.0)
upper = gymapi.Vec3(spacing, spacing, spacing)
asset_root = self.cfg["env"]["asset"]["assetRoot"]
asset_file = self.cfg["env"]["asset"]["assetFileName"]
asset_path = os.path.join(asset_root, asset_file)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
asset_options.angular_damping = 0.01
asset_options.max_angular_velocity = 100.0
asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
#asset_options.fix_base_link = True
humanoid_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
humanoid_asset_op = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
actuator_props = self.gym.get_asset_actuator_properties(humanoid_asset)
motor_efforts = [prop.motor_effort for prop in actuator_props]
# create force sensors at the feet
right_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "right_foot")
left_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "left_foot")
# op
right_foot_idx_op = self.gym.find_asset_rigid_body_index(humanoid_asset_op, "right_foot")
left_foot_idx_op = self.gym.find_asset_rigid_body_index(humanoid_asset_op, "left_foot")
sensor_pose = gymapi.Transform()
sensor_pose_op = gymapi.Transform()
self.gym.create_asset_force_sensor(humanoid_asset, right_foot_idx, sensor_pose)
self.gym.create_asset_force_sensor(humanoid_asset, left_foot_idx, sensor_pose)
# op
self.gym.create_asset_force_sensor(humanoid_asset_op, right_foot_idx_op, sensor_pose_op)
self.gym.create_asset_force_sensor(humanoid_asset_op, left_foot_idx_op, sensor_pose_op)
self.max_motor_effort = max(motor_efforts)
self.motor_efforts = to_torch(motor_efforts, device=self.device)
self.torso_index = 0
# 17 bodies
self.num_bodies = self.gym.get_asset_rigid_body_count(humanoid_asset)
# 31 dofs
self.num_dof = self.gym.get_asset_dof_count(humanoid_asset)
# 34 joints
self.num_joints = self.gym.get_asset_joint_count(humanoid_asset)
self.humanoid_handles = []
self.humanoid_handles_op = []
self.humanoid_indices = []
self.humanoid_indices_op = []
self.envs = []
self.dof_limits_lower = []
self.dof_limits_upper = []
for i in range(self.num_envs):
# create env instance
env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row)
self._build_env(i, env_ptr, humanoid_asset, humanoid_asset_op)
self.envs.append(env_ptr)
dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.humanoid_handles[0])
for j in range(self.num_dof):
if dof_prop['lower'][j] > dof_prop['upper'][j]:
self.dof_limits_lower.append(dof_prop['upper'][j])
self.dof_limits_upper.append(dof_prop['lower'][j])
else:
self.dof_limits_lower.append(dof_prop['lower'][j])
self.dof_limits_upper.append(dof_prop['upper'][j])
self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
self.humanoid_indices = to_torch(self.humanoid_indices, dtype=torch.long, device=self.device)
self.humanoid_indices_op = to_torch(self.humanoid_indices_op, dtype=torch.long, device=self.device)
if (self._pd_control):
self._build_pd_action_offset_scale()
return
def _build_env(self, env_id, env_ptr, humanoid_asset, humanoid_asset_op):
col_group = env_id
col_filter = self._get_humanoid_collision_filter()
segmentation_id = 0
start_pose = gymapi.Transform()
start_pose_op = gymapi.Transform()
# asset_file = self.cfg["env"]["asset"]["assetFileName"]
# char_h = 0.89
start_pose.p = gymapi.Vec3(-self.borderline_space + 2, -self.borderline_space + 2, 0.89)
start_pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)
start_pose_op.p = gymapi.Vec3(self.borderline_space - 2, self.borderline_space - 2, 0.89)
# start_pose_op.p = gymapi.Vec3(0, 0, 0.89)
start_pose_op.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)
humanoid_handle = self.gym.create_actor(env_ptr, humanoid_asset, start_pose, "humanoid", col_group, col_filter, segmentation_id)
humanoid_index = self.gym.get_actor_index(env_ptr, humanoid_handle, gymapi.DOMAIN_SIM)
humanoid_handle_op = self.gym.create_actor(env_ptr, humanoid_asset_op, start_pose_op, "humanoid", col_group, col_filter, segmentation_id)
humanoid_index_op = self.gym.get_actor_index(env_ptr, humanoid_handle_op, gymapi.DOMAIN_SIM)
self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle)
self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle_op)
for j in range(self.num_bodies):
self.gym.set_rigid_body_color(env_ptr, humanoid_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.54, 0.85, 0.2))
self.gym.set_rigid_body_color(env_ptr, humanoid_handle_op, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.97, 0.38, 0.06))
if (self._pd_control):
dof_prop = self.gym.get_asset_dof_properties(humanoid_asset)
dof_prop["driveMode"] = gymapi.DOF_MODE_POS
self.gym.set_actor_dof_properties(env_ptr, humanoid_handle, dof_prop)
dof_prop_op = self.gym.get_asset_dof_properties(humanoid_asset_op)
dof_prop_op["driveMode"] = gymapi.DOF_MODE_POS
self.gym.set_actor_dof_properties(env_ptr, humanoid_handle_op, dof_prop_op)
self.humanoid_handles.append(humanoid_handle)
self.humanoid_indices.append(humanoid_index)
self.humanoid_handles_op.append(humanoid_handle_op)
self.humanoid_indices_op.append(humanoid_index_op)
return
def _build_pd_action_offset_scale(self):
num_joints = len(self._dof_offsets) - 1
lim_low = self.dof_limits_lower.cpu().numpy()
lim_high = self.dof_limits_upper.cpu().numpy()
for j in range(num_joints):
dof_offset = self._dof_offsets[j]
dof_size = self._dof_offsets[j + 1] - self._dof_offsets[j]
if (dof_size == 3):
curr_low = lim_low[dof_offset:(dof_offset + dof_size)]
curr_high = lim_high[dof_offset:(dof_offset + dof_size)]
curr_low = np.max(np.abs(curr_low))
curr_high = np.max(np.abs(curr_high))
curr_scale = max([curr_low, curr_high])
curr_scale = 1.2 * curr_scale
curr_scale = min([curr_scale, np.pi])
lim_low[dof_offset:(dof_offset + dof_size)] = -curr_scale
lim_high[dof_offset:(dof_offset + dof_size)] = curr_scale
#lim_low[dof_offset:(dof_offset + dof_size)] = -np.pi
#lim_high[dof_offset:(dof_offset + dof_size)] = np.pi
elif (dof_size == 1):
curr_low = lim_low[dof_offset]
curr_high = lim_high[dof_offset]
curr_mid = 0.5 * (curr_high + curr_low)
# extend the action range to be a bit beyond the joint limits so that the motors
# don't lose their strength as they approach the joint limits
curr_scale = 0.7 * (curr_high - curr_low)
curr_low = curr_mid - curr_scale
curr_high = curr_mid + curr_scale
lim_low[dof_offset] = curr_low
lim_high[dof_offset] = curr_high
self._pd_action_offset = 0.5 * (lim_high + lim_low)
self._pd_action_scale = 0.5 * (lim_high - lim_low)
self._pd_action_offset = to_torch(self._pd_action_offset, device=self.device)
self._pd_action_scale = to_torch(self._pd_action_scale, device=self.device)
return
def _get_humanoid_collision_filter(self):
return 0
def _compute_reward(self, actions):
self.rew_buf[:] = compute_humanoid_reward(self.obs_buf)
return
def _compute_reset(self):
self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf,
self._contact_forces, self._contact_body_ids,
self._rigid_body_pos, self.max_episode_length,
self._enable_early_termination, self._termination_heights)
return
def _refresh_sim_tensors(self):
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_rigid_body_state_tensor(self.sim)
self.gym.refresh_force_sensor_tensor(self.sim)
self.gym.refresh_dof_force_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
return
def _compute_observations(self):
obs, obs_op = self._compute_humanoid_obs()
self.obs_buf[:self.num_envs] = obs
self.obs_buf[self.num_envs:] = obs_op
return
def _compute_humanoid_obs(self):
body_pos = self._rigid_body_pos
body_rot = self._rigid_body_rot
body_vel = self._rigid_body_vel
body_ang_vel = self._rigid_body_ang_vel
body_pos_op = self._rigid_body_pos_op
body_rot_op = self._rigid_body_rot_op
body_vel_op = self._rigid_body_vel_op
body_ang_vel_op = self._rigid_body_ang_vel_op
obs = compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, self._local_root_obs,
self._root_height_obs)
obs_op = compute_humanoid_observations_max(body_pos_op, body_rot_op, body_vel_op, body_ang_vel_op, self._local_root_obs,
self._root_height_obs)
return obs, obs_op
def _reset_actors(self, env_ids):
agent_env_ids = expand_env_ids(env_ids, 2)
self._humanoid_root_states[agent_env_ids] = self._initial_humanoid_root_states[agent_env_ids]
self._dof_pos[env_ids] = self._initial_dof_pos[env_ids]
self._dof_vel[env_ids] = self._initial_dof_vel[env_ids]
self._dof_pos_op[env_ids] = self._initial_dof_pos_op[env_ids]
self._dof_vel_op[env_ids] = self._initial_dof_vel_op[env_ids]
return
def pre_physics_step(self, actions):
self.actions = actions.to(self.device).clone()
ego_actions = self.actions[:self.num_envs]
op_actions = self.actions[self.num_envs:]
if (self._pd_control):
pd_tar_ego = self._action_to_pd_targets(ego_actions)
pd_tar_op = self._action_to_pd_targets(op_actions)
pd_tar = torch.cat([pd_tar_ego, pd_tar_op], dim=-1)
pd_tar_tensor = gymtorch.unwrap_tensor(pd_tar)
self.gym.set_dof_position_target_tensor(self.sim, pd_tar_tensor)
else:
forces = self.actions * self.motor_efforts.unsqueeze(0) * self.power_scale
force_tensor = gymtorch.unwrap_tensor(forces)
self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor)
return
def post_physics_step(self):
self.progress_buf += 1
self._refresh_sim_tensors()
self._compute_observations()
self._compute_reward(self.actions)
self._compute_reset()
self.extras["terminate"] = self._terminate_buf
# debug viz
if self.viewer and self.debug_viz:
self._update_debug_viz()
return
def render(self, sync_frame_time=False):
super().render(sync_frame_time)
return
def _build_key_body_ids_tensor(self, key_body_names):
env_ptr = self.envs[0]
actor_handle = self.humanoid_handles[0]
body_ids = []
for body_name in key_body_names:
body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
assert(body_id != -1)
body_ids.append(body_id)
body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
return body_ids
def _build_contact_body_ids_tensor(self, contact_body_names):
env_ptr = self.envs[0]
actor_handle = self.humanoid_handles[0]
body_ids = []
for body_name in contact_body_names:
body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
assert(body_id != -1)
body_ids.append(body_id)
body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
return body_ids
def _action_to_pd_targets(self, action):
pd_tar = self._pd_action_offset + self._pd_action_scale * action
return pd_tar
def _update_debug_viz(self):
self.gym.clear_lines(self.viewer)
return
#####################################################################
###=========================jit functions=========================###
#####################################################################
@torch.jit.script
def dof_to_obs(pose, dof_obs_size, dof_offsets):
# type: (Tensor, int, List[int]) -> Tensor
joint_obs_size = 6
num_joints = len(dof_offsets) - 1
dof_obs_shape = pose.shape[:-1] + (dof_obs_size,)
dof_obs = torch.zeros(dof_obs_shape, device=pose.device)
dof_obs_offset = 0
for j in range(num_joints):
dof_offset = dof_offsets[j]
dof_size = dof_offsets[j + 1] - dof_offsets[j]
joint_pose = pose[:, dof_offset:(dof_offset + dof_size)]
# assume this is a spherical joint
if (dof_size == 3):
joint_pose_q = torch_utils.exp_map_to_quat(joint_pose)
elif (dof_size == 1):
axis = torch.tensor([0.0, 1.0, 0.0], dtype=joint_pose.dtype, device=pose.device)
joint_pose_q = quat_from_angle_axis(joint_pose[..., 0], axis)
else:
joint_pose_q = None
assert(False), "Unsupported joint type"
joint_dof_obs = torch_utils.quat_to_tan_norm(joint_pose_q)
dof_obs[:, (j * joint_obs_size):((j + 1) * joint_obs_size)] = joint_dof_obs
assert((num_joints * joint_obs_size) == dof_obs_size)
return dof_obs
@torch.jit.script
def compute_humanoid_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos,
local_root_obs, root_height_obs, dof_obs_size, dof_offsets):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor
root_h = root_pos[:, 2:3]
heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
if (local_root_obs):
root_rot_obs = quat_mul(heading_rot, root_rot)
else:
root_rot_obs = root_rot
root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs)
if (not root_height_obs):
root_h_obs = torch.zeros_like(root_h)
else:
root_h_obs = root_h
local_root_vel = quat_rotate(heading_rot, root_vel)
local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel)
root_pos_expand = root_pos.unsqueeze(-2)
local_key_body_pos = key_body_pos - root_pos_expand
heading_rot_expand = heading_rot.unsqueeze(-2)
heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1))
flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2])
flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1],
heading_rot_expand.shape[2])
local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos)
flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2])
dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets)
obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1)
return obs
@torch.jit.script
def compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, local_root_obs, root_height_obs):
# type: (Tensor, Tensor, Tensor, Tensor, bool, bool) -> Tensor
root_pos = body_pos[:, 0, :] # 0: pelvis, root
root_rot = body_rot[:, 0, :]
root_h = root_pos[:, 2:3] # 1. Height of the root from the ground
heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
if (not root_height_obs):
root_h_obs = torch.zeros_like(root_h)
else:
root_h_obs = root_h
heading_rot_expand = heading_rot.unsqueeze(-2) # num_envs, 1, 4
# num_envs, body_pos.shape[1], 4
heading_rot_expand = heading_rot_expand.repeat((1, body_pos.shape[1], 1))
flat_heading_rot = heading_rot_expand.reshape(heading_rot_expand.shape[0] * heading_rot_expand.shape[1],
heading_rot_expand.shape[2])
root_pos_expand = root_pos.unsqueeze(-2)
local_body_pos = body_pos - root_pos_expand
flat_local_body_pos = local_body_pos.reshape(local_body_pos.shape[0] * local_body_pos.shape[1], local_body_pos.shape[2])
flat_local_body_pos = quat_rotate(flat_heading_rot, flat_local_body_pos)
local_body_pos = flat_local_body_pos.reshape(local_body_pos.shape[0], local_body_pos.shape[1] * local_body_pos.shape[2])
local_body_pos = local_body_pos[..., 3:] # remove root pos
flat_body_rot = body_rot.reshape(body_rot.shape[0] * body_rot.shape[1], body_rot.shape[2])
flat_local_body_rot = quat_mul(flat_heading_rot, flat_body_rot)
flat_local_body_rot_obs = torch_utils.quat_to_tan_norm(flat_local_body_rot)
local_body_rot_obs = flat_local_body_rot_obs.reshape(body_rot.shape[0], body_rot.shape[1] * flat_local_body_rot_obs.shape[1])
if (local_root_obs):
root_rot_obs = torch_utils.quat_to_tan_norm(root_rot)
local_body_rot_obs[..., 0:6] = root_rot_obs
flat_body_vel = body_vel.reshape(body_vel.shape[0] * body_vel.shape[1], body_vel.shape[2])
flat_local_body_vel = quat_rotate(flat_heading_rot, flat_body_vel)
local_body_vel = flat_local_body_vel.reshape(body_vel.shape[0], body_vel.shape[1] * body_vel.shape[2])
flat_body_ang_vel = body_ang_vel.reshape(body_ang_vel.shape[0] * body_ang_vel.shape[1], body_ang_vel.shape[2])
flat_local_body_ang_vel = quat_rotate(flat_heading_rot, flat_body_ang_vel)
local_body_ang_vel = flat_local_body_ang_vel.reshape(body_ang_vel.shape[0], body_ang_vel.shape[1] * body_ang_vel.shape[2])
obs = torch.cat((root_h_obs, local_body_pos, local_body_rot_obs, local_body_vel, local_body_ang_vel), dim=-1)
return obs
@torch.jit.script
def expand_env_ids(env_ids, n_agents):
# type: (Tensor, int) -> Tensor
device = env_ids.device
agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
for idx in range(n_agents):
agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
return agent_env_ids
@torch.jit.script
def compute_humanoid_reward(obs_buf):
# type: (Tensor) -> Tensor
reward = torch.ones_like(obs_buf[:, 0])
return reward
@torch.jit.script
def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos,
max_episode_length, enable_early_termination, termination_heights):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor]
terminated = torch.zeros_like(reset_buf)
if (enable_early_termination):
masked_contact_buf = contact_buf.clone()
masked_contact_buf[:, contact_body_ids, :] = 0
fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
fall_contact = torch.any(fall_contact, dim=-1)
body_height = rigid_body_pos[..., 2]
fall_height = body_height < termination_heights
fall_height[:, contact_body_ids] = False
fall_height = torch.any(fall_height, dim=-1)
has_fallen = torch.logical_and(fall_contact, fall_height)
# first timestep can sometimes still have nonzero contact forces
# so only check after first couple of steps
has_fallen *= (progress_buf > 1)
terminated = torch.where(has_fallen, torch.ones_like(reset_buf), terminated)
reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)
return reset, terminated
================================================
FILE: timechamber/tasks/ase_humanoid_base/humanoid_amp.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from enum import Enum
import numpy as np
import torch
from isaacgym import gymapi
from isaacgym import gymtorch
from timechamber.tasks.ase_humanoid_base.humanoid import Humanoid, dof_to_obs
from timechamber.utils import gym_util
from timechamber.utils.motion_lib import MotionLib
from isaacgym.torch_utils import *
from utils import torch_utils
class HumanoidAMP(Humanoid):
class StateInit(Enum):
Default = 0
Start = 1
Random = 2
Hybrid = 3
def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
state_init = cfg["env"]["stateInit"]
self._state_init = HumanoidAMP.StateInit[state_init]
self._hybrid_init_prob = cfg["env"]["hybridInitProb"]
self._num_amp_obs_steps = cfg["env"]["numAMPObsSteps"]
assert(self._num_amp_obs_steps >= 2)
self._reset_default_env_ids = []
self._reset_ref_env_ids = []
super().__init__(cfg=cfg,
sim_params=sim_params,
physics_engine=physics_engine,
device_type=device_type,
device_id=device_id,
headless=headless)
motion_file = cfg['env']['motion_file']
self._load_motion(motion_file)
self._amp_obs_buf = torch.zeros((self.num_envs, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float)
self._curr_amp_obs_buf = self._amp_obs_buf[:, 0]
self._hist_amp_obs_buf = self._amp_obs_buf[:, 1:]
self._amp_obs_demo_buf = None
return
def post_physics_step(self):
super().post_physics_step()
self._update_hist_amp_obs()
self._compute_amp_observations()
amp_obs_flat = self._amp_obs_buf.view(-1, self.get_num_amp_obs())
self.extras["amp_obs"] = amp_obs_flat
return
def get_num_amp_obs(self):
return self._num_amp_obs_steps * self._num_amp_obs_per_step
def fetch_amp_obs_demo(self, num_samples):
if (self._amp_obs_demo_buf is None):
self._build_amp_obs_demo_buf(num_samples)
else:
assert(self._amp_obs_demo_buf.shape[0] == num_samples)
motion_ids = self._motion_lib.sample_motions(num_samples)
motion_times0 = self._motion_lib.sample_time(motion_ids)
amp_obs_demo = self.build_amp_obs_demo(motion_ids, motion_times0)
self._amp_obs_demo_buf[:] = amp_obs_demo.view(self._amp_obs_demo_buf.shape)
amp_obs_demo_flat = self._amp_obs_demo_buf.view(-1, self.get_num_amp_obs())
return amp_obs_demo_flat
def build_amp_obs_demo(self, motion_ids, motion_times0):
dt = self.dt
motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps])
motion_times = motion_times0.unsqueeze(-1)
time_steps = -dt * torch.arange(0, self._num_amp_obs_steps, device=self.device)
motion_times = motion_times + time_steps
motion_ids = motion_ids.view(-1)
motion_times = motion_times.view(-1)
root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
= self._motion_lib.get_motion_state(motion_ids, motion_times)
amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel,
dof_pos, dof_vel, key_pos,
self._local_root_obs, self._root_height_obs,
self._dof_obs_size, self._dof_offsets)
return amp_obs_demo
def _build_amp_obs_demo_buf(self, num_samples):
self._amp_obs_demo_buf = torch.zeros((num_samples, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float32)
return
def _setup_character_props(self, key_bodies):
super()._setup_character_props(key_bodies)
asset_file = self.cfg["env"]["asset"]["assetFileName"]
num_key_bodies = len(key_bodies)
if (asset_file == "mjcf/amp_humanoid.xml"):
self._num_amp_obs_per_step = 13 + self._dof_obs_size + 28 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
self._num_amp_obs_per_step = 13 + self._dof_obs_size + 31 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
else:
print("Unsupported character config file: {s}".format(asset_file))
assert(False)
return
def _load_motion(self, motion_file):
assert(self._dof_offsets[-1] == self.num_dof)
self._motion_lib = MotionLib(motion_file=motion_file,
dof_body_ids=self._dof_body_ids,
dof_offsets=self._dof_offsets,
key_body_ids=self._key_body_ids.cpu().numpy(),
device=self.device)
return
def _reset_envs(self, env_ids):
self._reset_default_env_ids = []
self._reset_ref_env_ids = []
super()._reset_envs(env_ids)
self._init_amp_obs(env_ids)
return
def _reset_actors(self, env_ids):
if (self._state_init == HumanoidAMP.StateInit.Default):
self._reset_default(env_ids)
elif (self._state_init == HumanoidAMP.StateInit.Start
or self._state_init == HumanoidAMP.StateInit.Random):
self._reset_ref_state_init(env_ids)
elif (self._state_init == HumanoidAMP.StateInit.Hybrid):
self._reset_hybrid_state_init(env_ids)
else:
assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init))
return
def _reset_default(self, env_ids):
super()._reset_actors(env_ids)
# self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids]
# self._dof_pos[env_ids] = self._initial_dof_pos[env_ids]
# self._dof_vel[env_ids] = self._initial_dof_vel[env_ids]
# self._reset_default_env_ids = env_ids
return
def _reset_ref_state_init(self, env_ids):
num_envs = env_ids.shape[0]
motion_ids = self._motion_lib.sample_motions(num_envs)
if (self._state_init == HumanoidAMP.StateInit.Random
or self._state_init == HumanoidAMP.StateInit.Hybrid):
motion_times = self._motion_lib.sample_time(motion_ids)
elif (self._state_init == HumanoidAMP.StateInit.Start):
motion_times = torch.zeros(num_envs, device=self.device)
else:
assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init))
root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
= self._motion_lib.get_motion_state(motion_ids, motion_times)
self._set_env_state(env_ids=env_ids,
root_pos=root_pos,
root_rot=root_rot,
dof_pos=dof_pos,
root_vel=root_vel,
root_ang_vel=root_ang_vel,
dof_vel=dof_vel)
self._reset_ref_env_ids = env_ids
self._reset_ref_motion_ids = motion_ids
self._reset_ref_motion_times = motion_times
return
def _reset_hybrid_state_init(self, env_ids):
num_envs = env_ids.shape[0]
ref_probs = to_torch(np.array([self._hybrid_init_prob] * num_envs), device=self.device)
ref_init_mask = torch.bernoulli(ref_probs) == 1.0
ref_reset_ids = env_ids[ref_init_mask]
if (len(ref_reset_ids) > 0):
self._reset_ref_state_init(ref_reset_ids)
default_reset_ids = env_ids[torch.logical_not(ref_init_mask)]
if (len(default_reset_ids) > 0):
self._reset_default(default_reset_ids)
return
def _init_amp_obs(self, env_ids):
self._compute_amp_observations(env_ids)
if (len(self._reset_default_env_ids) > 0):
self._init_amp_obs_default(self._reset_default_env_ids)
if (len(self._reset_ref_env_ids) > 0):
self._init_amp_obs_ref(self._reset_ref_env_ids, self._reset_ref_motion_ids,
self._reset_ref_motion_times)
return
def _init_amp_obs_default(self, env_ids):
curr_amp_obs = self._curr_amp_obs_buf[env_ids].unsqueeze(-2)
self._hist_amp_obs_buf[env_ids] = curr_amp_obs
return
def _init_amp_obs_ref(self, env_ids, motion_ids, motion_times):
dt = self.dt
motion_ids = torch.tile(motion_ids, [1, self._num_amp_obs_steps - 1])
motion_times = motion_times.unsqueeze(-1)
time_steps = -dt * (torch.arange(0, self._num_amp_obs_steps - 1, device=self.device) + 1)
motion_times = motion_times + time_steps
motion_ids = motion_ids.view(-1)
motion_times = motion_times.view(-1)
root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
= self._motion_lib.get_motion_state(motion_ids, motion_times)
amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel,
dof_pos, dof_vel, key_pos,
self._local_root_obs, self._root_height_obs,
self._dof_obs_size, self._dof_offsets)
self._hist_amp_obs_buf[env_ids] = amp_obs_demo.view(self._hist_amp_obs_buf[env_ids].shape)
return
def _set_env_state(self, env_ids, root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel):
self._humanoid_root_states[env_ids, 0:3] = root_pos
self._humanoid_root_states[env_ids, 3:7] = root_rot
self._humanoid_root_states[env_ids, 7:10] = root_vel
self._humanoid_root_states[env_ids, 10:13] = root_ang_vel
self._dof_pos[env_ids] = dof_pos
self._dof_vel[env_ids] = dof_vel
return
def _update_hist_amp_obs(self, env_ids=None):
if (env_ids is None):
self._hist_amp_obs_buf[:] = self._amp_obs_buf[:, 0:(self._num_amp_obs_steps - 1)]
else:
self._hist_amp_obs_buf[env_ids] = self._amp_obs_buf[env_ids, 0:(self._num_amp_obs_steps - 1)]
return
def _compute_amp_observations(self, env_ids=None):
key_body_pos = self._rigid_body_pos[:, self._key_body_ids, :]
if (env_ids is None):
self._curr_amp_obs_buf[:] = build_amp_observations(self._rigid_body_pos[:, 0, :],
self._rigid_body_rot[:, 0, :],
self._rigid_body_vel[:, 0, :],
self._rigid_body_ang_vel[:, 0, :],
self._dof_pos, self._dof_vel, key_body_pos,
self._local_root_obs, self._root_height_obs,
self._dof_obs_size, self._dof_offsets)
else:
self._curr_amp_obs_buf[env_ids] = build_amp_observations(self._rigid_body_pos[env_ids][:, 0, :],
self._rigid_body_rot[env_ids][:, 0, :],
self._rigid_body_vel[env_ids][:, 0, :],
self._rigid_body_ang_vel[env_ids][:, 0, :],
self._dof_pos[env_ids], self._dof_vel[env_ids], key_body_pos[env_ids],
self._local_root_obs, self._root_height_obs,
self._dof_obs_size, self._dof_offsets)
return
#####################################################################
###=========================jit functions=========================###
#####################################################################
@torch.jit.script
def build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos,
local_root_obs, root_height_obs, dof_obs_size, dof_offsets):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor
root_h = root_pos[:, 2:3]
heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
if (local_root_obs):
root_rot_obs = quat_mul(heading_rot, root_rot)
else:
root_rot_obs = root_rot
root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs)
if (not root_height_obs):
root_h_obs = torch.zeros_like(root_h)
else:
root_h_obs = root_h
local_root_vel = quat_rotate(heading_rot, root_vel)
local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel)
root_pos_expand = root_pos.unsqueeze(-2)
local_key_body_pos = key_body_pos - root_pos_expand
heading_rot_expand = heading_rot.unsqueeze(-2)
heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1))
flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2])
flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1],
heading_rot_expand.shape[2])
local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos)
flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2])
dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets)
obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1)
return obs
================================================
FILE: timechamber/tasks/ase_humanoid_base/humanoid_amp_task.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
import timechamber.tasks.ase_humanoid_base.humanoid_amp as humanoid_amp
class HumanoidAMPTask(humanoid_amp.HumanoidAMP):
def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
self._enable_task_obs = cfg["env"]["enableTaskObs"]
super().__init__(cfg=cfg,
sim_params=sim_params,
physics_engine=physics_engine,
device_type=device_type,
device_id=device_id,
headless=headless)
return
def get_obs_size(self):
obs_size = super().get_obs_size()
if (self._enable_task_obs):
task_obs_size = self.get_task_obs_size()
obs_size += task_obs_size
return obs_size
def get_task_obs_size(self):
return 0
def pre_physics_step(self, actions):
super().pre_physics_step(actions)
self._update_task()
return
def render(self, sync_frame_time=False):
super().render(sync_frame_time)
if self.viewer:
self._draw_task()
return
def _update_task(self):
return
def _reset_envs(self, env_ids):
super()._reset_envs(env_ids)
self._reset_task(env_ids)
return
def _reset_task(self, env_ids):
return
def _compute_observations(self):
# humanoid_obs = self._compute_humanoid_obs()
# if (self._enable_task_obs):
# task_obs = self._compute_task_obs(env_ids=None)
# obs = torch.cat([humanoid_obs, task_obs], dim=-1)
# else:
# obs = humanoid_obs
# if (env_ids is None):
# self.obs_buf[:] = obs
# else:
# self.obs_buf[env_ids] = obs
obs, obs_op = self._compute_humanoid_obs()
if (self._enable_task_obs):
task_obs = self._compute_task_obs(env_ids=None)
obs = torch.cat([obs, task_obs], dim=-1)
# else:
self.obs_buf[:self.num_envs] = obs
self.obs_buf[self.num_envs:] = obs_op
return
def _compute_task_obs(self, env_ids=None):
return NotImplemented
def _compute_reward(self, actions):
return NotImplemented
def _draw_task(self):
return
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/README.md
================================================
# poselib
`poselib` is a library for loading, manipulating, and retargeting skeleton poses and motions. It is separated into three modules: `poselib.core` for basic data loading and tensor operations, `poselib.skeleton` for higher-level skeleton operations, and `poselib.visualization` for displaying skeleton poses. This library is built on top of the PyTorch framework and requires data to be in PyTorch tensors.
## poselib.core
- `poselib.core.rotation3d`: A set of Torch JIT functions for computing quaternions, transforms, and rotation/transformation matrices.
- `quat_*` manipulate and create quaternions in [x, y, z, w] format (where w is the real component).
- `transform_*` handle 7D transforms in [quat, pos] format.
- `rot_matrix_*` handle 3x3 rotation matrices.
- `euclidean_*` handle 4x4 Euclidean transformation matrices.
- `poselib.core.tensor_utils`: Provides loading and saving functions for PyTorch tensors.
## poselib.skeleton
- `poselib.skeleton.skeleton3d`: Utilities for loading and manipulating skeleton poses, and retargeting poses to different skeletons.
- `SkeletonTree` is a class that stores a skeleton as a tree structure. This describes the skeleton topology and joints.
- `SkeletonState` describes the static state of a skeleton, and provides both global and local joint angles.
- `SkeletonMotion` describes a time-series of skeleton states and provides utilities for computing joint velocities.
## poselib.visualization
- `poselib.visualization.common`: Functions used for visualizing skeletons interactively in `matplotlib`.
- In SkeletonState visualization, use key `q` to quit window.
- In interactive SkeletonMotion visualization, you can use the following key commands:
- `w` - loop animation
- `x` - play/pause animation
- `z` - previous frame
- `c` - next frame
- `n` - quit window
## Key Features
Poselib provides several key features for working with animation data. We list some of the frequently used ones here, and provide instructions and examples on their usage.
### Importing from FBX
Poselib supports importing skeletal animation sequences from .fbx format into a SkeletonMotion representation. To use this functionality, you will need to first set up the Python FBX SDK on your machine using the following instructions.
This package is necessary to read data from fbx files, which is a proprietary file format owned by Autodesk. The latest FBX SDK tested was FBX SDK 2020.2.1 for Python 3.7, which can be found on the Autodesk website: https://www.autodesk.com/developer-network/platform-technologies/fbx-sdk-2020-2-1.
Follow the instructions at https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html for download, install, and copy/paste instructions for the FBX Python SDK.
This repo provides an example script `fbx_importer.py` that shows usage of importing a .fbx file. Note that `SkeletonMotion.from_fbx()` takes in an optional parameter `root_joint`, which can be used to specify a joint in the skeleton tree as the root joint. If `root_joint` is not specified, we will default to using the first node in the FBX scene that contains animation data.
### Importing from MJCF
MJCF is a robotics file format supported by Isaac Gym. For convenience, we provide an API for importing MJCF assets into SkeletonTree definitions to represent the skeleton topology. An example script `mjcf_importer.py` is provided to show usage of this.
This can be helpful if motion sequences need to be retargeted to your simulation skeleton that's been created in MJCF format. Importing the file to SkeletonTree format will allow you to generate T-poses or other retargeting poses that can be used for retargeting. We also show an example of creating a T-Pose for our AMP Humanoid asset in `generate_amp_humanoid_tpose.py`.
### Retargeting Motions
Retargeting motions is important when your source data uses skeletons that have different morphologies than your target skeletons. We provide APIs for performing retarget of motion sequences in our SkeletonState and SkeletonMotion classes.
To use the retargeting API, users must provide the following information:
- source_motion: a SkeletonMotion npy representation of a motion sequence. The motion clip should use the same skeleton as the source T-Pose skeleton.
- target_motion_path: path to save the retargeted motion to
- source_tpose: a SkeletonState npy representation of the source skeleton in it's T-Pose state
- target_tpose: a SkeletonState npy representation of the target skeleton in it's T-Pose state (pose should match source T-Pose)
- joint_mapping: mapping of joint names from source to target
- rotation: root rotation offset from source to target skeleton (for transforming across different orientation axes), represented as a quaternion in XYZW order.
- scale: scale offset from source to target skeleton
We provide an example script `retarget_motion.py` to demonstrate usage of the retargeting API for the CMU Motion Capture Database. Note that the retargeting data for this script is stored in `data/configs/retarget_cmu_to_amp.json`.
Additionally, a SkeletonState T-Pose file and retargeting config file are also provided for the SFU Motion Capture Database. These can be found at `data/sfu_tpose.npy` and `data/configs/retarget_sfu_to_amp.json`.
### Documentation
We provide a description of the functions and classes available in poselib in the comments of the APIs. Please check them out for more details.
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/data/configs/retarget_cmu_to_amp.json
================================================
{
"source_motion": "data/01_01_cmu.npy",
"target_motion_path": "data/01_01_cmu_amp.npy",
"source_tpose": "data/cmu_tpose.npy",
"target_tpose": "data/amp_humanoid_tpose.npy",
"joint_mapping": {
"Hips": "pelvis",
"LeftUpLeg": "left_thigh",
"LeftLeg": "left_shin",
"LeftFoot": "left_foot",
"RightUpLeg": "right_thigh",
"RightLeg": "right_shin",
"RightFoot": "right_foot",
"Spine1": "torso",
"Head": "head",
"LeftArm": "left_upper_arm",
"LeftForeArm": "left_lower_arm",
"LeftHand": "left_hand",
"RightArm": "right_upper_arm",
"RightForeArm": "right_lower_arm",
"RightHand": "right_hand"
},
"rotation": [0, 0, 0.7071068, 0.7071068],
"scale": 0.056444,
"root_height_offset": 0.05,
"trim_frame_beg": 75,
"trim_frame_end": 372
}
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/data/configs/retarget_sfu_to_amp.json
================================================
{
"source_motion": "data/0005_Jogging001.npy",
"target_motion_path": "data/0005_Jogging001_amp.npy",
"source_tpose": "data/sfu_tpose.npy",
"target_tpose": "data/amp_humanoid_tpose.npy",
"joint_mapping": {
"Hips": "pelvis",
"LeftUpLeg": "left_thigh",
"LeftLeg": "left_shin",
"LeftFoot": "left_foot",
"RightUpLeg": "right_thigh",
"RightLeg": "right_shin",
"RightFoot": "right_foot",
"Spine1": "torso",
"Head": "head",
"LeftArm": "left_upper_arm",
"LeftForeArm": "left_lower_arm",
"LeftHand": "left_hand",
"RightArm": "right_upper_arm",
"RightForeArm": "right_lower_arm",
"RightHand": "right_hand"
},
"rotation": [0.5, 0.5, 0.5, 0.5],
"scale": 0.01,
"root_height_offset": 0.0,
"trim_frame_beg": 0,
"trim_frame_end": 100
}
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/fbx_importer.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import json
from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState, SkeletonMotion
from poselib.visualization.common import plot_skeleton_state, plot_skeleton_motion_interactive
# source fbx file path
fbx_file = "data/01_01_cmu.fbx"
# import fbx file - make sure to provide a valid joint name for root_joint
motion = SkeletonMotion.from_fbx(
fbx_file_path=fbx_file,
root_joint="Hips",
fps=60
)
# save motion in npy format
motion.to_file("data/01_01_cmu.npy")
# visualize motion
plot_skeleton_motion_interactive(motion)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/generate_amp_humanoid_tpose.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
from poselib.core.rotation3d import *
from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState
from poselib.visualization.common import plot_skeleton_state
"""
This scripts imports a MJCF XML file and converts the skeleton into a SkeletonTree format.
It then generates a zero rotation pose, and adjusts the pose into a T-Pose.
"""
# import MJCF file
xml_path = "../../../../assets/mjcf/amp_humanoid.xml"
skeleton = SkeletonTree.from_mjcf(xml_path)
# generate zero rotation pose
zero_pose = SkeletonState.zero_pose(skeleton)
# adjust pose into a T Pose
local_rotation = zero_pose.local_rotation
local_rotation[skeleton.index("left_upper_arm")] = quat_mul(
quat_from_angle_axis(angle=torch.tensor([90.0]), axis=torch.tensor([1.0, 0.0, 0.0]), degree=True),
local_rotation[skeleton.index("left_upper_arm")]
)
local_rotation[skeleton.index("right_upper_arm")] = quat_mul(
quat_from_angle_axis(angle=torch.tensor([-90.0]), axis=torch.tensor([1.0, 0.0, 0.0]), degree=True),
local_rotation[skeleton.index("right_upper_arm")]
)
translation = zero_pose.root_translation
translation += torch.tensor([0, 0, 0.9])
# save and visualize T-pose
zero_pose.to_file("data/amp_humanoid_tpose.npy")
plot_skeleton_state(zero_pose)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/mjcf_importer.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState
from poselib.visualization.common import plot_skeleton_state
# load in XML mjcf file and save zero rotation pose in npy format
xml_path = "../../../../assets/mjcf/nv_humanoid.xml"
skeleton = SkeletonTree.from_mjcf(xml_path)
zero_pose = SkeletonState.zero_pose(skeleton)
zero_pose.to_file("data/nv_humanoid.npy")
# visualize zero rotation pose
plot_skeleton_state(zero_pose)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
__version__ = "0.0.1"
from .core import *
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from .tensor_utils import *
from .rotation3d import *
from .backend import Serializable, logger
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from .abstract import Serializable
from .logger import logger
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/abstract.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from abc import ABCMeta, abstractmethod, abstractclassmethod
from collections import OrderedDict
import json
import numpy as np
import os
TENSOR_CLASS = {}
def register(name):
global TENSOR_CLASS
def core(tensor_cls):
TENSOR_CLASS[name] = tensor_cls
return tensor_cls
return core
def _get_cls(name):
global TENSOR_CLASS
return TENSOR_CLASS[name]
class NumpyEncoder(json.JSONEncoder):
""" Special json encoder for numpy types """
def default(self, obj):
if isinstance(
obj,
(
np.int_,
np.intc,
np.intp,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
),
):
return int(obj)
elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
return float(obj)
elif isinstance(obj, (np.ndarray,)):
return dict(__ndarray__=obj.tolist(), dtype=str(obj.dtype), shape=obj.shape)
return json.JSONEncoder.default(self, obj)
def json_numpy_obj_hook(dct):
if isinstance(dct, dict) and "__ndarray__" in dct:
data = np.asarray(dct["__ndarray__"], dtype=dct["dtype"])
return data.reshape(dct["shape"])
return dct
class Serializable:
""" Implementation to read/write to file.
All class the is inherited from this class needs to implement to_dict() and
from_dict()
"""
@abstractclassmethod
def from_dict(cls, dict_repr, *args, **kwargs):
""" Read the object from an ordered dictionary
:param dict_repr: the ordered dictionary that is used to construct the object
:type dict_repr: OrderedDict
:param args, kwargs: the arguments that need to be passed into from_dict()
:type args, kwargs: additional arguments
"""
pass
@abstractmethod
def to_dict(self):
""" Construct an ordered dictionary from the object
:rtype: OrderedDict
"""
pass
@classmethod
def from_file(cls, path, *args, **kwargs):
""" Read the object from a file (either .npy or .json)
:param path: path of the file
:type path: string
:param args, kwargs: the arguments that need to be passed into from_dict()
:type args, kwargs: additional arguments
"""
if path.endswith(".json"):
with open(path, "r") as f:
d = json.load(f, object_hook=json_numpy_obj_hook)
elif path.endswith(".npy"):
d = np.load(path, allow_pickle=True).item()
else:
assert False, "failed to load {} from {}".format(cls.__name__, path)
assert d["__name__"] == cls.__name__, "the file belongs to {}, not {}".format(
d["__name__"], cls.__name__
)
return cls.from_dict(d, *args, **kwargs)
def to_file(self, path: str) -> None:
""" Write the object to a file (either .npy or .json)
:param path: path of the file
:type path: string
"""
if os.path.dirname(path) != "" and not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
d = self.to_dict()
d["__name__"] = self.__class__.__name__
if path.endswith(".json"):
with open(path, "w") as f:
json.dump(d, f, cls=NumpyEncoder, indent=4)
elif path.endswith(".npy"):
np.save(path, d)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/logger.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import logging
logger = logging.getLogger("poselib")
logger.setLevel(logging.INFO)
if not len(logger.handlers):
formatter = logging.Formatter(
fmt="%(asctime)-15s - %(levelname)s - %(module)s - %(message)s"
)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.info("logger initialized")
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/rotation3d.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import List, Optional
import math
import torch
@torch.jit.script
def quat_mul(a, b):
"""
quaternion multiplication
"""
x1, y1, z1, w1 = a[..., 0], a[..., 1], a[..., 2], a[..., 3]
x2, y2, z2, w2 = b[..., 0], b[..., 1], b[..., 2], b[..., 3]
w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
return torch.stack([x, y, z, w], dim=-1)
@torch.jit.script
def quat_pos(x):
"""
make all the real part of the quaternion positive
"""
q = x
z = (q[..., 3:] < 0).float()
q = (1 - 2 * z) * q
return q
@torch.jit.script
def quat_abs(x):
"""
quaternion norm (unit quaternion represents a 3D rotation, which has norm of 1)
"""
x = x.norm(p=2, dim=-1)
return x
@torch.jit.script
def quat_unit(x):
"""
normalized quaternion with norm of 1
"""
norm = quat_abs(x).unsqueeze(-1)
return x / (norm.clamp(min=1e-9))
@torch.jit.script
def quat_conjugate(x):
"""
quaternion with its imaginary part negated
"""
return torch.cat([-x[..., :3], x[..., 3:]], dim=-1)
@torch.jit.script
def quat_real(x):
"""
real component of the quaternion
"""
return x[..., 3]
@torch.jit.script
def quat_imaginary(x):
"""
imaginary components of the quaternion
"""
return x[..., :3]
@torch.jit.script
def quat_norm_check(x):
"""
verify that a quaternion has norm 1
"""
assert bool(
(abs(x.norm(p=2, dim=-1) - 1) < 1e-3).all()
), "the quaternion is has non-1 norm: {}".format(abs(x.norm(p=2, dim=-1) - 1))
assert bool((x[..., 3] >= 0).all()), "the quaternion has negative real part"
@torch.jit.script
def quat_normalize(q):
"""
Construct 3D rotation from quaternion (the quaternion needs not to be normalized).
"""
q = quat_unit(quat_pos(q)) # normalized to positive and unit quaternion
return q
@torch.jit.script
def quat_from_xyz(xyz):
"""
Construct 3D rotation from the imaginary component
"""
w = (1.0 - xyz.norm()).unsqueeze(-1)
assert bool((w >= 0).all()), "xyz has its norm greater than 1"
return torch.cat([xyz, w], dim=-1)
@torch.jit.script
def quat_identity(shape: List[int]):
"""
Construct 3D identity rotation given shape
"""
w = torch.ones(shape + [1])
xyz = torch.zeros(shape + [3])
q = torch.cat([xyz, w], dim=-1)
return quat_normalize(q)
@torch.jit.script
def quat_from_angle_axis(angle, axis, degree: bool = False):
""" Create a 3D rotation from angle and axis of rotation. The rotation is counter-clockwise
along the axis.
The rotation can be interpreted as a_R_b where frame "b" is the new frame that
gets rotated counter-clockwise along the axis from frame "a"
:param angle: angle of rotation
:type angle: Tensor
:param axis: axis of rotation
:type axis: Tensor
:param degree: put True here if the angle is given by degree
:type degree: bool, optional, default=False
"""
if degree:
angle = angle / 180.0 * math.pi
theta = (angle / 2).unsqueeze(-1)
axis = axis / (axis.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-9))
xyz = axis * theta.sin()
w = theta.cos()
return quat_normalize(torch.cat([xyz, w], dim=-1))
@torch.jit.script
def quat_from_rotation_matrix(m):
"""
Construct a 3D rotation from a valid 3x3 rotation matrices.
Reference can be found here:
http://www.cg.info.hiroshima-cu.ac.jp/~miyazaki/knowledge/teche52.html
:param m: 3x3 orthogonal rotation matrices.
:type m: Tensor
:rtype: Tensor
"""
m = m.unsqueeze(0)
diag0 = m[..., 0, 0]
diag1 = m[..., 1, 1]
diag2 = m[..., 2, 2]
# Math stuff.
w = (((diag0 + diag1 + diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
x = (((diag0 - diag1 - diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
y = (((-diag0 + diag1 - diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
z = (((-diag0 - diag1 + diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5
# Only modify quaternions where w > x, y, z.
c0 = (w >= x) & (w >= y) & (w >= z)
x[c0] *= (m[..., 2, 1][c0] - m[..., 1, 2][c0]).sign()
y[c0] *= (m[..., 0, 2][c0] - m[..., 2, 0][c0]).sign()
z[c0] *= (m[..., 1, 0][c0] - m[..., 0, 1][c0]).sign()
# Only modify quaternions where x > w, y, z
c1 = (x >= w) & (x >= y) & (x >= z)
w[c1] *= (m[..., 2, 1][c1] - m[..., 1, 2][c1]).sign()
y[c1] *= (m[..., 1, 0][c1] + m[..., 0, 1][c1]).sign()
z[c1] *= (m[..., 0, 2][c1] + m[..., 2, 0][c1]).sign()
# Only modify quaternions where y > w, x, z.
c2 = (y >= w) & (y >= x) & (y >= z)
w[c2] *= (m[..., 0, 2][c2] - m[..., 2, 0][c2]).sign()
x[c2] *= (m[..., 1, 0][c2] + m[..., 0, 1][c2]).sign()
z[c2] *= (m[..., 2, 1][c2] + m[..., 1, 2][c2]).sign()
# Only modify quaternions where z > w, x, y.
c3 = (z >= w) & (z >= x) & (z >= y)
w[c3] *= (m[..., 1, 0][c3] - m[..., 0, 1][c3]).sign()
x[c3] *= (m[..., 2, 0][c3] + m[..., 0, 2][c3]).sign()
y[c3] *= (m[..., 2, 1][c3] + m[..., 1, 2][c3]).sign()
return quat_normalize(torch.stack([x, y, z, w], dim=-1)).squeeze(0)
@torch.jit.script
def quat_mul_norm(x, y):
"""
Combine two set of 3D rotations together using \**\* operator. The shape needs to be
broadcastable
"""
return quat_normalize(quat_mul(x, y))
@torch.jit.script
def quat_rotate(rot, vec):
"""
Rotate a 3D vector with the 3D rotation
"""
other_q = torch.cat([vec, torch.zeros_like(vec[..., :1])], dim=-1)
return quat_imaginary(quat_mul(quat_mul(rot, other_q), quat_conjugate(rot)))
@torch.jit.script
def quat_inverse(x):
"""
The inverse of the rotation
"""
return quat_conjugate(x)
@torch.jit.script
def quat_identity_like(x):
"""
Construct identity 3D rotation with the same shape
"""
return quat_identity(x.shape[:-1])
@torch.jit.script
def quat_angle_axis(x):
"""
The (angle, axis) representation of the rotation. The axis is normalized to unit length.
The angle is guaranteed to be between [0, pi].
"""
s = 2 * (x[..., 3] ** 2) - 1
angle = s.clamp(-1, 1).arccos() # just to be safe
axis = x[..., :3]
axis /= axis.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-9)
return angle, axis
@torch.jit.script
def quat_yaw_rotation(x, z_up: bool = True):
"""
Yaw rotation (rotation along z-axis)
"""
q = x
if z_up:
q = torch.cat([torch.zeros_like(q[..., 0:2]), q[..., 2:3], q[..., 3:]], dim=-1)
else:
q = torch.cat(
[
torch.zeros_like(q[..., 0:1]),
q[..., 1:2],
torch.zeros_like(q[..., 2:3]),
q[..., 3:4],
],
dim=-1,
)
return quat_normalize(q)
@torch.jit.script
def transform_from_rotation_translation(
r: Optional[torch.Tensor] = None, t: Optional[torch.Tensor] = None
):
"""
Construct a transform from a quaternion and 3D translation. Only one of them can be None.
"""
assert r is not None or t is not None, "rotation and translation can't be all None"
if r is None:
assert t is not None
r = quat_identity(list(t.shape))
if t is None:
t = torch.zeros(list(r.shape) + [3])
return torch.cat([r, t], dim=-1)
@torch.jit.script
def transform_identity(shape: List[int]):
"""
Identity transformation with given shape
"""
r = quat_identity(shape)
t = torch.zeros(shape + [3])
return transform_from_rotation_translation(r, t)
@torch.jit.script
def transform_rotation(x):
"""Get rotation from transform"""
return x[..., :4]
@torch.jit.script
def transform_translation(x):
"""Get translation from transform"""
return x[..., 4:]
@torch.jit.script
def transform_inverse(x):
"""
Inverse transformation
"""
inv_so3 = quat_inverse(transform_rotation(x))
return transform_from_rotation_translation(
r=inv_so3, t=quat_rotate(inv_so3, -transform_translation(x))
)
@torch.jit.script
def transform_identity_like(x):
"""
identity transformation with the same shape
"""
return transform_identity(x.shape)
@torch.jit.script
def transform_mul(x, y):
"""
Combine two transformation together
"""
z = transform_from_rotation_translation(
r=quat_mul_norm(transform_rotation(x), transform_rotation(y)),
t=quat_rotate(transform_rotation(x), transform_translation(y))
+ transform_translation(x),
)
return z
@torch.jit.script
def transform_apply(rot, vec):
"""
Transform a 3D vector
"""
assert isinstance(vec, torch.Tensor)
return quat_rotate(transform_rotation(rot), vec) + transform_translation(rot)
@torch.jit.script
def rot_matrix_det(x):
"""
Return the determinant of the 3x3 matrix. The shape of the tensor will be as same as the
shape of the matrix
"""
a, b, c = x[..., 0, 0], x[..., 0, 1], x[..., 0, 2]
d, e, f = x[..., 1, 0], x[..., 1, 1], x[..., 1, 2]
g, h, i = x[..., 2, 0], x[..., 2, 1], x[..., 2, 2]
t1 = a * (e * i - f * h)
t2 = b * (d * i - f * g)
t3 = c * (d * h - e * g)
return t1 - t2 + t3
@torch.jit.script
def rot_matrix_integrity_check(x):
"""
Verify that a rotation matrix has a determinant of one and is orthogonal
"""
det = rot_matrix_det(x)
assert bool((abs(det - 1) < 1e-3).all()), "the matrix has non-one determinant"
rtr = x @ x.permute(torch.arange(x.dim() - 2), -1, -2)
rtr_gt = rtr.zeros_like()
rtr_gt[..., 0, 0] = 1
rtr_gt[..., 1, 1] = 1
rtr_gt[..., 2, 2] = 1
assert bool(((rtr - rtr_gt) < 1e-3).all()), "the matrix is not orthogonal"
@torch.jit.script
def rot_matrix_from_quaternion(q):
"""
Construct rotation matrix from quaternion
"""
# Shortcuts for individual elements (using wikipedia's convention)
qi, qj, qk, qr = q[..., 0], q[..., 1], q[..., 2], q[..., 3]
# Set individual elements
R00 = 1.0 - 2.0 * (qj ** 2 + qk ** 2)
R01 = 2 * (qi * qj - qk * qr)
R02 = 2 * (qi * qk + qj * qr)
R10 = 2 * (qi * qj + qk * qr)
R11 = 1.0 - 2.0 * (qi ** 2 + qk ** 2)
R12 = 2 * (qj * qk - qi * qr)
R20 = 2 * (qi * qk - qj * qr)
R21 = 2 * (qj * qk + qi * qr)
R22 = 1.0 - 2.0 * (qi ** 2 + qj ** 2)
R0 = torch.stack([R00, R01, R02], dim=-1)
R1 = torch.stack([R10, R11, R12], dim=-1)
R2 = torch.stack([R10, R21, R22], dim=-1)
R = torch.stack([R0, R1, R2], dim=-2)
return R
@torch.jit.script
def euclidean_to_rotation_matrix(x):
"""
Get the rotation matrix on the top-left corner of a Euclidean transformation matrix
"""
return x[..., :3, :3]
@torch.jit.script
def euclidean_integrity_check(x):
euclidean_to_rotation_matrix(x) # check 3d-rotation matrix
assert bool((x[..., 3, :3] == 0).all()), "the last row is illegal"
assert bool((x[..., 3, 3] == 1).all()), "the last row is illegal"
@torch.jit.script
def euclidean_translation(x):
"""
Get the translation vector located at the last column of the matrix
"""
return x[..., :3, 3]
@torch.jit.script
def euclidean_inverse(x):
"""
Compute the matrix that represents the inverse rotation
"""
s = x.zeros_like()
irot = quat_inverse(quat_from_rotation_matrix(x))
s[..., :3, :3] = irot
s[..., :3, 4] = quat_rotate(irot, -euclidean_translation(x))
return s
@torch.jit.script
def euclidean_to_transform(transformation_matrix):
"""
Construct a transform from a Euclidean transformation matrix
"""
return transform_from_rotation_translation(
r=quat_from_rotation_matrix(
m=euclidean_to_rotation_matrix(transformation_matrix)
),
t=euclidean_translation(transformation_matrix),
)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tensor_utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from collections import OrderedDict
from .backend import Serializable
import torch
class TensorUtils(Serializable):
@classmethod
def from_dict(cls, dict_repr, *args, **kwargs):
""" Read the object from an ordered dictionary
:param dict_repr: the ordered dictionary that is used to construct the object
:type dict_repr: OrderedDict
:param kwargs: the arguments that need to be passed into from_dict()
:type kwargs: additional arguments
"""
return torch.from_numpy(dict_repr["arr"].astype(dict_repr["context"]["dtype"]))
def to_dict(self):
""" Construct an ordered dictionary from the object
:rtype: OrderedDict
"""
return NotImplemented
def tensor_to_dict(x):
""" Construct an ordered dictionary from the object
:rtype: OrderedDict
"""
x_np = x.numpy()
return {
"arr": x_np,
"context": {
"dtype": x_np.dtype.name
}
}
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tests/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tests/test_rotation.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from ..rotation3d import *
import numpy as np
import torch
q = torch.from_numpy(np.array([[0, 1, 2, 3], [-2, 3, -1, 5]], dtype=np.float32))
print("q", q)
r = quat_normalize(q)
x = torch.from_numpy(np.array([[1, 0, 0], [0, -1, 0]], dtype=np.float32))
print(r)
print(quat_rotate(r, x))
angle = torch.from_numpy(np.array(np.random.rand() * 10.0, dtype=np.float32))
axis = torch.from_numpy(
np.array([1, np.random.rand() * 10.0, np.random.rand() * 10.0], dtype=np.float32),
)
print(repr(angle))
print(repr(axis))
rot = quat_from_angle_axis(angle, axis)
x = torch.from_numpy(np.random.rand(5, 6, 3))
y = quat_rotate(quat_inverse(rot), quat_rotate(rot, x))
print(x.numpy())
print(y.numpy())
assert np.allclose(x.numpy(), y.numpy())
m = torch.from_numpy(np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]], dtype=np.float32))
r = quat_from_rotation_matrix(m)
t = torch.from_numpy(np.array([0, 1, 0], dtype=np.float32))
se3 = transform_from_rotation_translation(r=r, t=t)
print(se3)
print(transform_apply(se3, t))
rot = quat_from_angle_axis(
torch.from_numpy(np.array([45, -54], dtype=np.float32)),
torch.from_numpy(np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)),
degree=True,
)
trans = torch.from_numpy(np.array([[1, 1, 0], [1, 1, 0]], dtype=np.float32))
transform = transform_from_rotation_translation(r=rot, t=trans)
t = transform_mul(transform, transform_inverse(transform))
gt = np.zeros((2, 7))
gt[:, 0] = 1.0
print(t.numpy())
print(gt)
# assert np.allclose(t.numpy(), gt)
transform2 = torch.from_numpy(
np.array(
[[1, 0, 0, 1], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=np.float32
),
)
transform2 = euclidean_to_transform(transform2)
print(transform2)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/__init__.py
================================================
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/fbx_backend.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script reads an fbx file and returns the joint names, parents, and transforms.
NOTE: It requires the Python FBX package to be installed.
"""
import sys
import numpy as np
try:
import fbx
import FbxCommon
except ImportError as e:
print("Error: FBX library failed to load - importing FBX data will not succeed. Message: {}".format(e))
print("FBX tools must be installed from https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html")
def fbx_to_npy(file_name_in, root_joint_name, fps):
"""
This function reads in an fbx file, and saves the relevant info to a numpy array
Fbx files have a series of animation curves, each of which has animations at different
times. This script assumes that for mocap data, there is only one animation curve that
contains all the joints. Otherwise it is unclear how to read in the data.
If this condition isn't met, then the method throws an error
:param file_name_in: str, file path in. Should be .fbx file
:return: nothing, it just writes a file.
"""
# Create the fbx scene object and load the .fbx file
fbx_sdk_manager, fbx_scene = FbxCommon.InitializeSdkObjects()
FbxCommon.LoadScene(fbx_sdk_manager, fbx_scene, file_name_in)
"""
To read in the animation, we must find the root node of the skeleton.
Unfortunately fbx files can have "scene parents" and other parts of the tree that are
not joints
As a crude fix, this reader just takes and finds the first thing which has an
animation curve attached
"""
search_root = (root_joint_name is None or root_joint_name == "")
# Get the root node of the skeleton, which is the child of the scene's root node
possible_root_nodes = [fbx_scene.GetRootNode()]
found_root_node = False
max_key_count = 0
root_joint = None
while len(possible_root_nodes) > 0:
joint = possible_root_nodes.pop(0)
if not search_root:
if joint.GetName() == root_joint_name:
root_joint = joint
try:
curve = _get_animation_curve(joint, fbx_scene)
except RuntimeError:
curve = None
if curve is not None:
key_count = curve.KeyGetCount()
if key_count > max_key_count:
found_root_node = True
max_key_count = key_count
root_curve = curve
if search_root and not root_joint:
root_joint = joint
for child_index in range(joint.GetChildCount()):
possible_root_nodes.append(joint.GetChild(child_index))
if not found_root_node:
raise RuntimeError("No root joint found!! Exiting")
joint_list, joint_names, parents = _get_skeleton(root_joint)
"""
Read in the transformation matrices of the animation, taking the scaling into account
"""
anim_range, frame_count, frame_rate = _get_frame_count(fbx_scene)
local_transforms = []
#for frame in range(frame_count):
time_sec = anim_range.GetStart().GetSecondDouble()
time_range_sec = anim_range.GetStop().GetSecondDouble() - time_sec
fbx_fps = frame_count / time_range_sec
if fps != 120:
fbx_fps = fps
print("FPS: ", fbx_fps)
while time_sec < anim_range.GetStop().GetSecondDouble():
fbx_time = fbx.FbxTime()
fbx_time.SetSecondDouble(time_sec)
fbx_time = fbx_time.GetFramedTime()
transforms_current_frame = []
# Fbx has a unique time object which you need
#fbx_time = root_curve.KeyGetTime(frame)
for joint in joint_list:
arr = np.array(_recursive_to_list(joint.EvaluateLocalTransform(fbx_time)))
scales = np.array(_recursive_to_list(joint.EvaluateLocalScaling(fbx_time)))
if not np.allclose(scales[0:3], scales[0]):
raise ValueError(
"Different X, Y and Z scaling. Unsure how this should be handled. "
"To solve this, look at this link and try to upgrade the script "
"http://help.autodesk.com/view/FBX/2017/ENU/?guid=__files_GUID_10CDD"
"63C_79C1_4F2D_BB28_AD2BE65A02ED_htm"
)
# Adjust the array for scaling
arr /= scales[0]
arr[3, 3] = 1.0
transforms_current_frame.append(arr)
local_transforms.append(transforms_current_frame)
time_sec += (1.0/fbx_fps)
local_transforms = np.array(local_transforms)
print("Frame Count: ", len(local_transforms))
return joint_names, parents, local_transforms, fbx_fps
def _get_frame_count(fbx_scene):
# Get the animation stacks and layers, in order to pull off animation curves later
num_anim_stacks = fbx_scene.GetSrcObjectCount(
FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId)
)
# if num_anim_stacks != 1:
# raise RuntimeError(
# "More than one animation stack was found. "
# "This script must be modified to handle this case. Exiting"
# )
if num_anim_stacks > 1:
index = 1
else:
index = 0
anim_stack = fbx_scene.GetSrcObject(
FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId), index
)
anim_range = anim_stack.GetLocalTimeSpan()
duration = anim_range.GetDuration()
fps = duration.GetFrameRate(duration.GetGlobalTimeMode())
frame_count = duration.GetFrameCount(True)
return anim_range, frame_count, fps
def _get_animation_curve(joint, fbx_scene):
# Get the animation stacks and layers, in order to pull off animation curves later
num_anim_stacks = fbx_scene.GetSrcObjectCount(
FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId)
)
# if num_anim_stacks != 1:
# raise RuntimeError(
# "More than one animation stack was found. "
# "This script must be modified to handle this case. Exiting"
# )
if num_anim_stacks > 1:
index = 1
else:
index = 0
anim_stack = fbx_scene.GetSrcObject(
FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId), index
)
num_anim_layers = anim_stack.GetSrcObjectCount(
FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimLayer.ClassId)
)
if num_anim_layers != 1:
raise RuntimeError(
"More than one animation layer was found. "
"This script must be modified to handle this case. Exiting"
)
animation_layer = anim_stack.GetSrcObject(
FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimLayer.ClassId), 0
)
def _check_longest_curve(curve, max_curve_key_count):
longest_curve = None
if curve and curve.KeyGetCount() > max_curve_key_count[0]:
max_curve_key_count[0] = curve.KeyGetCount()
return True
return False
max_curve_key_count = [0]
longest_curve = None
for c in ["X", "Y", "Z"]:
curve = joint.LclTranslation.GetCurve(
animation_layer, c
) # sample curve for translation
if _check_longest_curve(curve, max_curve_key_count):
longest_curve = curve
curve = joint.LclRotation.GetCurve(
animation_layer, "X"
)
if _check_longest_curve(curve, max_curve_key_count):
longest_curve = curve
return longest_curve
def _get_skeleton(root_joint):
# Do a depth first search of the skeleton to extract all the joints
joint_list = [root_joint]
joint_names = [root_joint.GetName()]
parents = [-1] # -1 means no parent
def append_children(joint, pos):
"""
Depth first search function
:param joint: joint item in the fbx
:param pos: position of current element (for parenting)
:return: Nothing
"""
for child_index in range(joint.GetChildCount()):
child = joint.GetChild(child_index)
joint_list.append(child)
joint_names.append(child.GetName())
parents.append(pos)
append_children(child, len(parents) - 1)
append_children(root_joint, 0)
return joint_list, joint_names, parents
def _recursive_to_list(array):
"""
Takes some iterable that might contain iterables and converts it to a list of lists
[of lists... etc]
Mainly used for converting the strange fbx wrappers for c++ arrays into python lists
:param array: array to be converted
:return: array converted to lists
"""
try:
return float(array)
except TypeError:
return [_recursive_to_list(a) for a in array]
def parse_fbx(file_name_in, root_joint_name, fps):
return fbx_to_npy(file_name_in, root_joint_name, fps)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/fbx_read_wrapper.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
"""
Script that reads in fbx files from python
This requires a configs file, which contains the command necessary to switch conda
environments to run the fbx reading script from python
"""
from ....core import logger
import inspect
import os
import numpy as np
from .fbx_backend import parse_fbx
def fbx_to_array(fbx_file_path, root_joint, fps):
"""
Reads an fbx file to an array.
:param fbx_file_path: str, file path to fbx
:return: tuple with joint_names, parents, transforms, frame time
"""
# Ensure the file path is valid
fbx_file_path = os.path.abspath(fbx_file_path)
assert os.path.exists(fbx_file_path)
# Parse FBX file
joint_names, parents, local_transforms, fbx_fps = parse_fbx(fbx_file_path, root_joint, fps)
return joint_names, parents, local_transforms, fbx_fps
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/skeleton3d.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import xml.etree.ElementTree as ET
from collections import OrderedDict
from typing import List, Optional, Type, Dict
import numpy as np
import torch
from ..core import *
from .backend.fbx.fbx_read_wrapper import fbx_to_array
import scipy.ndimage.filters as filters
class SkeletonTree(Serializable):
"""
A skeleton tree gives a complete description of a rigid skeleton. It describes a tree structure
over a list of nodes with their names indicated by strings. Each edge in the tree has a local
translation associated with it which describes the distance between the two nodes that it
connects.
Basic Usage:
>>> t = SkeletonTree.from_mjcf(SkeletonTree.__example_mjcf_path__)
>>> t
SkeletonTree(
node_names=['torso', 'front_left_leg', 'aux_1', 'front_left_foot', 'front_right_leg', 'aux_2', 'front_right_foot', 'left_back_leg', 'aux_3', 'left_back_foot', 'right_back_leg', 'aux_4', 'right_back_foot'],
parent_indices=tensor([-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 0, 10, 11]),
local_translation=tensor([[ 0.0000, 0.0000, 0.7500],
[ 0.0000, 0.0000, 0.0000],
[ 0.2000, 0.2000, 0.0000],
[ 0.2000, 0.2000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[-0.2000, 0.2000, 0.0000],
[-0.2000, 0.2000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[-0.2000, -0.2000, 0.0000],
[-0.2000, -0.2000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[ 0.2000, -0.2000, 0.0000],
[ 0.2000, -0.2000, 0.0000]])
)
>>> t.node_names
['torso', 'front_left_leg', 'aux_1', 'front_left_foot', 'front_right_leg', 'aux_2', 'front_right_foot', 'left_back_leg', 'aux_3', 'left_back_foot', 'right_back_leg', 'aux_4', 'right_back_foot']
>>> t.parent_indices
tensor([-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 0, 10, 11])
>>> t.local_translation
tensor([[ 0.0000, 0.0000, 0.7500],
[ 0.0000, 0.0000, 0.0000],
[ 0.2000, 0.2000, 0.0000],
[ 0.2000, 0.2000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[-0.2000, 0.2000, 0.0000],
[-0.2000, 0.2000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[-0.2000, -0.2000, 0.0000],
[-0.2000, -0.2000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[ 0.2000, -0.2000, 0.0000],
[ 0.2000, -0.2000, 0.0000]])
>>> t.parent_of('front_left_leg')
'torso'
>>> t.index('front_right_foot')
6
>>> t[2]
'aux_1'
"""
__example_mjcf_path__ = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "tests/ant.xml"
)
def __init__(self, node_names, parent_indices, local_translation):
"""
:param node_names: a list of names for each tree node
:type node_names: List[str]
:param parent_indices: an int32-typed tensor that represents the edge to its parent.\
-1 represents the root node
:type parent_indices: Tensor
:param local_translation: a 3d vector that gives local translation information
:type local_translation: Tensor
"""
ln, lp, ll = len(node_names), len(parent_indices), len(local_translation)
assert len(set((ln, lp, ll))) == 1
self._node_names = node_names
self._parent_indices = parent_indices.long()
self._local_translation = local_translation
self._node_indices = {self.node_names[i]: i for i in range(len(self))}
def __len__(self):
""" number of nodes in the skeleton tree """
return len(self.node_names)
def __iter__(self):
""" iterator that iterate through the name of each node """
yield from self.node_names
def __getitem__(self, item):
""" get the name of the node given the index """
return self.node_names[item]
def __repr__(self):
return (
"SkeletonTree(\n node_names={},\n parent_indices={},"
"\n local_translation={}\n)".format(
self._indent(repr(self.node_names)),
self._indent(repr(self.parent_indices)),
self._indent(repr(self.local_translation)),
)
)
def _indent(self, s):
return "\n ".join(s.split("\n"))
@property
def node_names(self):
return self._node_names
@property
def parent_indices(self):
return self._parent_indices
@property
def local_translation(self):
return self._local_translation
@property
def num_joints(self):
""" number of nodes in the skeleton tree """
return len(self)
@classmethod
def from_dict(cls, dict_repr, *args, **kwargs):
return cls(
list(map(str, dict_repr["node_names"])),
TensorUtils.from_dict(dict_repr["parent_indices"], *args, **kwargs),
TensorUtils.from_dict(dict_repr["local_translation"], *args, **kwargs),
)
def to_dict(self):
return OrderedDict(
[
("node_names", self.node_names),
("parent_indices", tensor_to_dict(self.parent_indices)),
("local_translation", tensor_to_dict(self.local_translation)),
]
)
@classmethod
def from_mjcf(cls, path: str) -> "SkeletonTree":
"""
Parses a mujoco xml scene description file and returns a Skeleton Tree.
We use the model attribute at the root as the name of the tree.
:param path:
:type path: string
:return: The skeleton tree constructed from the mjcf file
:rtype: SkeletonTree
"""
tree = ET.parse(path)
xml_doc_root = tree.getroot()
xml_world_body = xml_doc_root.find("worldbody")
if xml_world_body is None:
raise ValueError("MJCF parsed incorrectly please verify it.")
# assume this is the root
xml_body_root = xml_world_body.find("body")
if xml_body_root is None:
raise ValueError("MJCF parsed incorrectly please verify it.")
node_names = []
parent_indices = []
local_translation = []
# recursively adding all nodes into the skel_tree
def _add_xml_node(xml_node, parent_index, node_index):
node_name = xml_node.attrib.get("name")
# parse the local translation into float list
pos = np.fromstring(xml_node.attrib.get("pos"), dtype=float, sep=" ")
node_names.append(node_name)
parent_indices.append(parent_index)
local_translation.append(pos)
curr_index = node_index
node_index += 1
for next_node in xml_node.findall("body"):
node_index = _add_xml_node(next_node, curr_index, node_index)
return node_index
_add_xml_node(xml_body_root, -1, 0)
return cls(
node_names,
torch.from_numpy(np.array(parent_indices, dtype=np.int32)),
torch.from_numpy(np.array(local_translation, dtype=np.float32)),
)
def parent_of(self, node_name):
""" get the name of the parent of the given node
:param node_name: the name of the node
:type node_name: string
:rtype: string
"""
return self[int(self.parent_indices[self.index(node_name)].item())]
def index(self, node_name):
""" get the index of the node
:param node_name: the name of the node
:type node_name: string
:rtype: int
"""
return self._node_indices[node_name]
def drop_nodes_by_names(
self, node_names: List[str], pairwise_translation=None
) -> "SkeletonTree":
new_length = len(self) - len(node_names)
new_node_names = []
new_local_translation = torch.zeros(
new_length, 3, dtype=self.local_translation.dtype
)
new_parent_indices = torch.zeros(new_length, dtype=self.parent_indices.dtype)
parent_indices = self.parent_indices.numpy()
new_node_indices: dict = {}
new_node_index = 0
for node_index in range(len(self)):
if self[node_index] in node_names:
continue
tb_node_index = parent_indices[node_index]
if tb_node_index != -1:
local_translation = self.local_translation[node_index, :]
while tb_node_index != -1 and self[tb_node_index] in node_names:
local_translation += self.local_translation[tb_node_index, :]
tb_node_index = parent_indices[tb_node_index]
assert tb_node_index != -1, "the root node cannot be dropped"
if pairwise_translation is not None:
local_translation = pairwise_translation[
tb_node_index, node_index, :
]
else:
local_translation = self.local_translation[node_index, :]
new_node_names.append(self[node_index])
new_local_translation[new_node_index, :] = local_translation
if tb_node_index == -1:
new_parent_indices[new_node_index] = -1
else:
new_parent_indices[new_node_index] = new_node_indices[
self[tb_node_index]
]
new_node_indices[self[node_index]] = new_node_index
new_node_index += 1
return SkeletonTree(new_node_names, new_parent_indices, new_local_translation)
def keep_nodes_by_names(
self, node_names: List[str], pairwise_translation=None
) -> "SkeletonTree":
nodes_to_drop = list(filter(lambda x: x not in node_names, self))
return self.drop_nodes_by_names(nodes_to_drop, pairwise_translation)
class SkeletonState(Serializable):
"""
A skeleton state contains all the information needed to describe a static state of a skeleton.
It requires a skeleton tree, local/global rotation at each joint and the root translation.
Example:
>>> t = SkeletonTree.from_mjcf(SkeletonTree.__example_mjcf_path__)
>>> zero_pose = SkeletonState.zero_pose(t)
>>> plot_skeleton_state(zero_pose) # can be imported from `.visualization.common`
[plot of the ant at zero pose
>>> local_rotation = zero_pose.local_rotation.clone()
>>> local_rotation[2] = torch.tensor([0, 0, 1, 0])
>>> new_pose = SkeletonState.from_rotation_and_root_translation(
... skeleton_tree=t,
... r=local_rotation,
... t=zero_pose.root_translation,
... is_local=True
... )
>>> new_pose.local_rotation
tensor([[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 1., 0., 0.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.]])
>>> plot_skeleton_state(new_pose) # you should be able to see one of ant's leg is bent
[plot of the ant with the new pose
>>> new_pose.global_rotation # the local rotation is propagated to the global rotation at joint #3
tensor([[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 1., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.],
[0., 0., 0., 1.]])
Global/Local Representation (cont. from the previous example)
>>> new_pose.is_local
True
>>> new_pose.tensor # this will return the local rotation followed by the root translation
tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
0.])
>>> new_pose.tensor.shape # 4 * 13 (joint rotation) + 3 (root translatio
torch.Size([55])
>>> new_pose.global_repr().is_local
False
>>> new_pose.global_repr().tensor # this will return the global rotation followed by the root translation instead
tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0.,
0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
0.])
>>> new_pose.global_repr().tensor.shape # 4 * 13 (joint rotation) + 3 (root translation
torch.Size([55])
"""
def __init__(self, tensor_backend, skeleton_tree, is_local):
self._skeleton_tree = skeleton_tree
self._is_local = is_local
self.tensor = tensor_backend.clone()
def __len__(self):
return self.tensor.shape[0]
@property
def rotation(self):
if not hasattr(self, "_rotation"):
self._rotation = self.tensor[..., : self.num_joints * 4].reshape(
*(self.tensor.shape[:-1] + (self.num_joints, 4))
)
return self._rotation
@property
def _local_rotation(self):
if self._is_local:
return self.rotation
else:
return None
@property
def _global_rotation(self):
if not self._is_local:
return self.rotation
else:
return None
@property
def is_local(self):
""" is the rotation represented in local frame?
:rtype: bool
"""
return self._is_local
@property
def invariant_property(self):
return {"skeleton_tree": self.skeleton_tree, "is_local": self.is_local}
@property
def num_joints(self):
""" number of joints in the skeleton tree
:rtype: int
"""
return self.skeleton_tree.num_joints
@property
def skeleton_tree(self):
""" skeleton tree
:rtype: SkeletonTree
"""
return self._skeleton_tree
@property
def root_translation(self):
""" root translation
:rtype: Tensor
"""
if not hasattr(self, "_root_translation"):
self._root_translation = self.tensor[
..., self.num_joints * 4 : self.num_joints * 4 + 3
]
return self._root_translation
@property
def global_transformation(self):
""" global transformation of each joint (transform from joint frame to global frame) """
if not hasattr(self, "_global_transformation"):
local_transformation = self.local_transformation
global_transformation = []
parent_indices = self.skeleton_tree.parent_indices.numpy()
# global_transformation = local_transformation.identity_like()
for node_index in range(len(self.skeleton_tree)):
parent_index = parent_indices[node_index]
if parent_index == -1:
global_transformation.append(
local_transformation[..., node_index, :]
)
else:
global_transformation.append(
transform_mul(
global_transformation[parent_index],
local_transformation[..., node_index, :],
)
)
self._global_transformation = torch.stack(global_transformation, axis=-2)
return self._global_transformation
@property
def global_rotation(self):
""" global rotation of each joint (rotation matrix to rotate from joint's F.O.R to global
F.O.R) """
if self._global_rotation is None:
if not hasattr(self, "_comp_global_rotation"):
self._comp_global_rotation = transform_rotation(
self.global_transformation
)
return self._comp_global_rotation
else:
return self._global_rotation
@property
def global_translation(self):
""" global translation of each joint """
if not hasattr(self, "_global_translation"):
self._global_translation = transform_translation(self.global_transformation)
return self._global_translation
@property
def global_translation_xy(self):
""" global translation in xy """
trans_xy_data = self.global_translation.zeros_like()
trans_xy_data[..., 0:2] = self.global_translation[..., 0:2]
return trans_xy_data
@property
def global_translation_xz(self):
""" global translation in xz """
trans_xz_data = self.global_translation.zeros_like()
trans_xz_data[..., 0:1] = self.global_translation[..., 0:1]
trans_xz_data[..., 2:3] = self.global_translation[..., 2:3]
return trans_xz_data
@property
def local_rotation(self):
""" the rotation from child frame to parent frame given in the order of child nodes appeared
in `.skeleton_tree.node_names` """
if self._local_rotation is None:
if not hasattr(self, "_comp_local_rotation"):
local_rotation = quat_identity_like(self.global_rotation)
for node_index in range(len(self.skeleton_tree)):
parent_index = self.skeleton_tree.parent_indices[node_index]
if parent_index == -1:
local_rotation[..., node_index, :] = self.global_rotation[
..., node_index, :
]
else:
local_rotation[..., node_index, :] = quat_mul_norm(
quat_inverse(self.global_rotation[..., parent_index, :]),
self.global_rotation[..., node_index, :],
)
self._comp_local_rotation = local_rotation
return self._comp_local_rotation
else:
return self._local_rotation
@property
def local_transformation(self):
""" local translation + local rotation. It describes the transformation from child frame to
parent frame given in the order of child nodes appeared in `.skeleton_tree.node_names` """
if not hasattr(self, "_local_transformation"):
self._local_transformation = transform_from_rotation_translation(
r=self.local_rotation, t=self.local_translation
)
return self._local_transformation
@property
def local_translation(self):
""" local translation of the skeleton state. It is identical to the local translation in
`.skeleton_tree.local_translation` except the root translation. The root translation is
identical to `.root_translation` """
if not hasattr(self, "_local_translation"):
broadcast_shape = (
tuple(self.tensor.shape[:-1])
+ (len(self.skeleton_tree),)
+ tuple(self.skeleton_tree.local_translation.shape[-1:])
)
local_translation = self.skeleton_tree.local_translation.broadcast_to(
*broadcast_shape
).clone()
local_translation[..., 0, :] = self.root_translation
self._local_translation = local_translation
return self._local_translation
# Root Properties
@property
def root_translation_xy(self):
""" root translation on xy """
if not hasattr(self, "_root_translation_xy"):
self._root_translation_xy = self.global_translation_xy[..., 0, :]
return self._root_translation_xy
@property
def global_root_rotation(self):
""" root rotation """
if not hasattr(self, "_global_root_rotation"):
self._global_root_rotation = self.global_rotation[..., 0, :]
return self._global_root_rotation
@property
def global_root_yaw_rotation(self):
""" root yaw rotation """
if not hasattr(self, "_global_root_yaw_rotation"):
self._global_root_yaw_rotation = self.global_root_rotation.yaw_rotation()
return self._global_root_yaw_rotation
# Properties relative to root
@property
def local_translation_to_root(self):
""" The 3D translation from joint frame to the root frame. """
if not hasattr(self, "_local_translation_to_root"):
self._local_translation_to_root = (
self.global_translation - self.root_translation.unsqueeze(-1)
)
return self._local_translation_to_root
@property
def local_rotation_to_root(self):
""" The 3D rotation from joint frame to the root frame. It is equivalent to
The root_R_world * world_R_node """
return (
quat_inverse(self.global_root_rotation).unsqueeze(-1) * self.global_rotation
)
def compute_forward_vector(
self,
left_shoulder_index,
right_shoulder_index,
left_hip_index,
right_hip_index,
gaussian_filter_width=20,
):
""" Computes forward vector based on cross product of the up vector with
average of the right->left shoulder and hip vectors """
global_positions = self.global_translation
# Perpendicular to the forward direction.
# Uses the shoulders and hips to find this.
side_direction = (
global_positions[:, left_shoulder_index].numpy()
- global_positions[:, right_shoulder_index].numpy()
+ global_positions[:, left_hip_index].numpy()
- global_positions[:, right_hip_index].numpy()
)
side_direction = (
side_direction
/ np.sqrt((side_direction ** 2).sum(axis=-1))[..., np.newaxis]
)
# Forward direction obtained by crossing with the up direction.
forward_direction = np.cross(side_direction, np.array([[0, 1, 0]]))
# Smooth the forward direction with a Gaussian.
# Axis 0 is the time/frame axis.
forward_direction = filters.gaussian_filter1d(
forward_direction, gaussian_filter_width, axis=0, mode="nearest"
)
forward_direction = (
forward_direction
/ np.sqrt((forward_direction ** 2).sum(axis=-1))[..., np.newaxis]
)
return torch.from_numpy(forward_direction)
@staticmethod
def _to_state_vector(rot, rt):
state_shape = rot.shape[:-2]
vr = rot.reshape(*(state_shape + (-1,)))
vt = rt.broadcast_to(*state_shape + rt.shape[-1:]).reshape(
*(state_shape + (-1,))
)
v = torch.cat([vr, vt], axis=-1)
return v
@classmethod
def from_dict(
cls: Type["SkeletonState"], dict_repr: OrderedDict, *args, **kwargs
) -> "SkeletonState":
rot = TensorUtils.from_dict(dict_repr["rotation"], *args, **kwargs)
rt = TensorUtils.from_dict(dict_repr["root_translation"], *args, **kwargs)
return cls(
SkeletonState._to_state_vector(rot, rt),
SkeletonTree.from_dict(dict_repr["skeleton_tree"], *args, **kwargs),
dict_repr["is_local"],
)
def to_dict(self) -> OrderedDict:
return OrderedDict(
[
("rotation", tensor_to_dict(self.rotation)),
("root_translation", tensor_to_dict(self.root_translation)),
("skeleton_tree", self.skeleton_tree.to_dict()),
("is_local", self.is_local),
]
)
@classmethod
def from_rotation_and_root_translation(cls, skeleton_tree, r, t, is_local=True):
"""
Construct a skeleton state from rotation and root translation
:param skeleton_tree: the skeleton tree
:type skeleton_tree: SkeletonTree
:param r: rotation (either global or local)
:type r: Tensor
:param t: root translation
:type t: Tensor
:param is_local: to indicate that whether the rotation is local or global
:type is_local: bool, optional, default=True
"""
assert (
r.dim() > 0
), "the rotation needs to have at least 1 dimension (dim = {})".format(r.dim)
return cls(
SkeletonState._to_state_vector(r, t),
skeleton_tree=skeleton_tree,
is_local=is_local,
)
@classmethod
def zero_pose(cls, skeleton_tree):
"""
Construct a zero-pose skeleton state from the skeleton tree by assuming that all the local
rotation is 0 and root translation is also 0.
:param skeleton_tree: the skeleton tree as the rigid body
:type skeleton_tree: SkeletonTree
"""
return cls.from_rotation_and_root_translation(
skeleton_tree=skeleton_tree,
r=quat_identity([skeleton_tree.num_joints]),
t=torch.zeros(3, dtype=skeleton_tree.local_translation.dtype),
is_local=True,
)
def local_repr(self):
"""
Convert the skeleton state into local representation. This will only affects the values of
.tensor. If the skeleton state already has `is_local=True`. This method will do nothing.
:rtype: SkeletonState
"""
if self.is_local:
return self
return SkeletonState.from_rotation_and_root_translation(
self.skeleton_tree,
r=self.local_rotation,
t=self.root_translation,
is_local=True,
)
def global_repr(self):
"""
Convert the skeleton state into global representation. This will only affects the values of
.tensor. If the skeleton state already has `is_local=False`. This method will do nothing.
:rtype: SkeletonState
"""
if not self.is_local:
return self
return SkeletonState.from_rotation_and_root_translation(
self.skeleton_tree,
r=self.global_rotation,
t=self.root_translation,
is_local=False,
)
def _get_pairwise_average_translation(self):
global_transform_inv = transform_inverse(self.global_transformation)
p1 = global_transform_inv.unsqueeze(-2)
p2 = self.global_transformation.unsqueeze(-3)
pairwise_translation = (
transform_translation(transform_mul(p1, p2))
.reshape(-1, len(self.skeleton_tree), len(self.skeleton_tree), 3)
.mean(axis=0)
)
return pairwise_translation
def _transfer_to(self, new_skeleton_tree: SkeletonTree):
old_indices = list(map(self.skeleton_tree.index, new_skeleton_tree))
return SkeletonState.from_rotation_and_root_translation(
new_skeleton_tree,
r=self.global_rotation[..., old_indices, :],
t=self.root_translation,
is_local=False,
)
def drop_nodes_by_names(
self, node_names: List[str], estimate_local_translation_from_states: bool = True
) -> "SkeletonState":
"""
Drop a list of nodes from the skeleton and re-compute the local rotation to match the
original joint position as much as possible.
:param node_names: a list node names that specifies the nodes need to be dropped
:type node_names: List of strings
:param estimate_local_translation_from_states: the boolean indicator that specifies whether\
or not to re-estimate the local translation from the states (avg.)
:type estimate_local_translation_from_states: boolean
:rtype: SkeletonState
"""
if estimate_local_translation_from_states:
pairwise_translation = self._get_pairwise_average_translation()
else:
pairwise_translation = None
new_skeleton_tree = self.skeleton_tree.drop_nodes_by_names(
node_names, pairwise_translation
)
return self._transfer_to(new_skeleton_tree)
def keep_nodes_by_names(
self, node_names: List[str], estimate_local_translation_from_states: bool = True
) -> "SkeletonState":
"""
Keep a list of nodes and drop all other nodes from the skeleton and re-compute the local
rotation to match the original joint position as much as possible.
:param node_names: a list node names that specifies the nodes need to be dropped
:type node_names: List of strings
:param estimate_local_translation_from_states: the boolean indicator that specifies whether\
or not to re-estimate the local translation from the states (avg.)
:type estimate_local_translation_from_states: boolean
:rtype: SkeletonState
"""
return self.drop_nodes_by_names(
list(filter(lambda x: (x not in node_names), self)),
estimate_local_translation_from_states,
)
def _remapped_to(
self, joint_mapping: Dict[str, str], target_skeleton_tree: SkeletonTree
):
joint_mapping_inv = {target: source for source, target in joint_mapping.items()}
reduced_target_skeleton_tree = target_skeleton_tree.keep_nodes_by_names(
list(joint_mapping_inv)
)
n_joints = (
len(joint_mapping),
len(self.skeleton_tree),
len(reduced_target_skeleton_tree),
)
assert (
len(set(n_joints)) == 1
), "the joint mapping is not consistent with the skeleton trees"
source_indices = list(
map(
lambda x: self.skeleton_tree.index(joint_mapping_inv[x]),
reduced_target_skeleton_tree,
)
)
target_local_rotation = self.local_rotation[..., source_indices, :]
return SkeletonState.from_rotation_and_root_translation(
skeleton_tree=reduced_target_skeleton_tree,
r=target_local_rotation,
t=self.root_translation,
is_local=True,
)
def retarget_to(
self,
joint_mapping: Dict[str, str],
source_tpose_local_rotation,
source_tpose_root_translation: np.ndarray,
target_skeleton_tree: SkeletonTree,
target_tpose_local_rotation,
target_tpose_root_translation: np.ndarray,
rotation_to_target_skeleton,
scale_to_target_skeleton: float,
z_up: bool = True,
) -> "SkeletonState":
"""
Retarget the skeleton state to a target skeleton tree. This is a naive retarget
implementation with rough approximations. The function follows the procedures below.
Steps:
1. Drop the joints from the source (self) that do not belong to the joint mapping\
with an implementation that is similar to "keep_nodes_by_names()" - take a\
look at the function doc for more details (same for source_tpose)
2. Rotate the source state and the source tpose by "rotation_to_target_skeleton"\
to align the source with the target orientation
3. Extract the root translation and normalize it to match the scale of the target\
skeleton
4. Extract the global rotation from source state relative to source tpose and\
re-apply the relative rotation to the target tpose to construct the global\
rotation after retargetting
5. Combine the computed global rotation and the root translation from 3 and 4 to\
complete the retargeting.
6. Make feet on the ground (global translation z)
:param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
the target skeleton
:type joint_mapping: Dict[str, str]
:param source_tpose_local_rotation: the local rotation of the source skeleton
:type source_tpose_local_rotation: Tensor
:param source_tpose_root_translation: the root translation of the source tpose
:type source_tpose_root_translation: np.ndarray
:param target_skeleton_tree: the target skeleton tree
:type target_skeleton_tree: SkeletonTree
:param target_tpose_local_rotation: the local rotation of the target skeleton
:type target_tpose_local_rotation: Tensor
:param target_tpose_root_translation: the root translation of the target tpose
:type target_tpose_root_translation: Tensor
:param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
the frame of reference of the target skeleton and s is the frame of reference of the source\
skeleton
:type rotation_to_target_skeleton: Tensor
:param scale_to_target_skeleton: the factor that needs to be multiplied from source\
skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
factor needs to be 0.01.
:type scale_to_target_skeleton: float
:rtype: SkeletonState
"""
# STEP 0: Preprocess
source_tpose = SkeletonState.from_rotation_and_root_translation(
skeleton_tree=self.skeleton_tree,
r=source_tpose_local_rotation,
t=source_tpose_root_translation,
is_local=True,
)
target_tpose = SkeletonState.from_rotation_and_root_translation(
skeleton_tree=target_skeleton_tree,
r=target_tpose_local_rotation,
t=target_tpose_root_translation,
is_local=True,
)
# STEP 1: Drop the irrelevant joints
pairwise_translation = self._get_pairwise_average_translation()
node_names = list(joint_mapping)
new_skeleton_tree = self.skeleton_tree.keep_nodes_by_names(
node_names, pairwise_translation
)
# TODO: combine the following steps before STEP 3
source_tpose = source_tpose._transfer_to(new_skeleton_tree)
source_state = self._transfer_to(new_skeleton_tree)
source_tpose = source_tpose._remapped_to(joint_mapping, target_skeleton_tree)
source_state = source_state._remapped_to(joint_mapping, target_skeleton_tree)
# STEP 2: Rotate the source to align with the target
new_local_rotation = source_tpose.local_rotation.clone()
new_local_rotation[..., 0, :] = quat_mul_norm(
rotation_to_target_skeleton, source_tpose.local_rotation[..., 0, :]
)
source_tpose = SkeletonState.from_rotation_and_root_translation(
skeleton_tree=source_tpose.skeleton_tree,
r=new_local_rotation,
t=quat_rotate(rotation_to_target_skeleton, source_tpose.root_translation),
is_local=True,
)
new_local_rotation = source_state.local_rotation.clone()
new_local_rotation[..., 0, :] = quat_mul_norm(
rotation_to_target_skeleton, source_state.local_rotation[..., 0, :]
)
source_state = SkeletonState.from_rotation_and_root_translation(
skeleton_tree=source_state.skeleton_tree,
r=new_local_rotation,
t=quat_rotate(rotation_to_target_skeleton, source_state.root_translation),
is_local=True,
)
# STEP 3: Normalize to match the target scale
root_translation_diff = (
source_state.root_translation - source_tpose.root_translation
) * scale_to_target_skeleton
# STEP 4: the global rotation from source state relative to source tpose and
# re-apply to the target
current_skeleton_tree = source_state.skeleton_tree
target_tpose_global_rotation = source_state.global_rotation[0, :].clone()
for current_index, name in enumerate(current_skeleton_tree):
if name in target_tpose.skeleton_tree:
target_tpose_global_rotation[
current_index, :
] = target_tpose.global_rotation[
target_tpose.skeleton_tree.index(name), :
]
global_rotation_diff = quat_mul_norm(
source_state.global_rotation, quat_inverse(source_tpose.global_rotation)
)
new_global_rotation = quat_mul_norm(
global_rotation_diff, target_tpose_global_rotation
)
# STEP 5: Putting 3 and 4 together
current_skeleton_tree = source_state.skeleton_tree
shape = source_state.global_rotation.shape[:-1]
shape = shape[:-1] + target_tpose.global_rotation.shape[-2:-1]
new_global_rotation_output = quat_identity(shape)
for current_index, name in enumerate(target_skeleton_tree):
while name not in current_skeleton_tree:
name = target_skeleton_tree.parent_of(name)
parent_index = current_skeleton_tree.index(name)
new_global_rotation_output[:, current_index, :] = new_global_rotation[
:, parent_index, :
]
source_state = SkeletonState.from_rotation_and_root_translation(
skeleton_tree=target_skeleton_tree,
r=new_global_rotation_output,
t=target_tpose.root_translation + root_translation_diff,
is_local=False,
).local_repr()
return source_state
def retarget_to_by_tpose(
self,
joint_mapping: Dict[str, str],
source_tpose: "SkeletonState",
target_tpose: "SkeletonState",
rotation_to_target_skeleton,
scale_to_target_skeleton: float,
) -> "SkeletonState":
"""
Retarget the skeleton state to a target skeleton tree. This is a naive retarget
implementation with rough approximations. See the method `retarget_to()` for more information
:param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
the target skeleton
:type joint_mapping: Dict[str, str]
:param source_tpose: t-pose of the source skeleton
:type source_tpose: SkeletonState
:param target_tpose: t-pose of the target skeleton
:type target_tpose: SkeletonState
:param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
the frame of reference of the target skeleton and s is the frame of reference of the source\
skeleton
:type rotation_to_target_skeleton: Tensor
:param scale_to_target_skeleton: the factor that needs to be multiplied from source\
skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
factor needs to be 0.01.
:type scale_to_target_skeleton: float
:rtype: SkeletonState
"""
assert (
len(source_tpose.shape) == 0 and len(target_tpose.shape) == 0
), "the retargeting script currently doesn't support vectorized operations"
return self.retarget_to(
joint_mapping,
source_tpose.local_rotation,
source_tpose.root_translation,
target_tpose.skeleton_tree,
target_tpose.local_rotation,
target_tpose.root_translation,
rotation_to_target_skeleton,
scale_to_target_skeleton,
)
class SkeletonMotion(SkeletonState):
def __init__(self, tensor_backend, skeleton_tree, is_local, fps, *args, **kwargs):
self._fps = fps
super().__init__(tensor_backend, skeleton_tree, is_local, *args, **kwargs)
def clone(self):
return SkeletonMotion(
self.tensor.clone(), self.skeleton_tree, self._is_local, self._fps
)
@property
def invariant_property(self):
return {
"skeleton_tree": self.skeleton_tree,
"is_local": self.is_local,
"fps": self.fps,
}
@property
def global_velocity(self):
""" global velocity """
curr_index = self.num_joints * 4 + 3
return self.tensor[..., curr_index : curr_index + self.num_joints * 3].reshape(
*(self.tensor.shape[:-1] + (self.num_joints, 3))
)
@property
def global_angular_velocity(self):
""" global angular velocity """
curr_index = self.num_joints * 7 + 3
return self.tensor[..., curr_index : curr_index + self.num_joints * 3].reshape(
*(self.tensor.shape[:-1] + (self.num_joints, 3))
)
@property
def fps(self):
""" number of frames per second """
return self._fps
@property
def time_delta(self):
""" time between two adjacent frames """
return 1.0 / self.fps
@property
def global_root_velocity(self):
""" global root velocity """
return self.global_velocity[..., 0, :]
@property
def global_root_angular_velocity(self):
""" global root angular velocity """
return self.global_angular_velocity[..., 0, :]
@classmethod
def from_state_vector_and_velocity(
cls,
skeleton_tree,
state_vector,
global_velocity,
global_angular_velocity,
is_local,
fps,
):
"""
Construct a skeleton motion from a skeleton state vector, global velocity and angular
velocity at each joint.
:param skeleton_tree: the skeleton tree that the motion is based on
:type skeleton_tree: SkeletonTree
:param state_vector: the state vector from the skeleton state by `.tensor`
:type state_vector: Tensor
:param global_velocity: the global velocity at each joint
:type global_velocity: Tensor
:param global_angular_velocity: the global angular velocity at each joint
:type global_angular_velocity: Tensor
:param is_local: if the rotation ins the state vector is given in local frame
:type is_local: boolean
:param fps: number of frames per second
:type fps: int
:rtype: SkeletonMotion
"""
state_shape = state_vector.shape[:-1]
v = global_velocity.reshape(*(state_shape + (-1,)))
av = global_angular_velocity.reshape(*(state_shape + (-1,)))
new_state_vector = torch.cat([state_vector, v, av], axis=-1)
return cls(
new_state_vector, skeleton_tree=skeleton_tree, is_local=is_local, fps=fps,
)
@classmethod
def from_skeleton_state(
cls: Type["SkeletonMotion"], skeleton_state: SkeletonState, fps: int
):
"""
Construct a skeleton motion from a skeleton state. The velocities are estimated using second
order gaussian filter along the last axis. The skeleton state must have at least .dim >= 1
:param skeleton_state: the skeleton state that the motion is based on
:type skeleton_state: SkeletonState
:param fps: number of frames per second
:type fps: int
:rtype: SkeletonMotion
"""
assert (
type(skeleton_state) == SkeletonState
), "expected type of {}, got {}".format(SkeletonState, type(skeleton_state))
global_velocity = SkeletonMotion._compute_velocity(
p=skeleton_state.global_translation, time_delta=1 / fps
)
global_angular_velocity = SkeletonMotion._compute_angular_velocity(
r=skeleton_state.global_rotation, time_delta=1 / fps
)
return cls.from_state_vector_and_velocity(
skeleton_tree=skeleton_state.skeleton_tree,
state_vector=skeleton_state.tensor,
global_velocity=global_velocity,
global_angular_velocity=global_angular_velocity,
is_local=skeleton_state.is_local,
fps=fps,
)
@staticmethod
def _to_state_vector(rot, rt, vel, avel):
state_shape = rot.shape[:-2]
skeleton_state_v = SkeletonState._to_state_vector(rot, rt)
v = vel.reshape(*(state_shape + (-1,)))
av = avel.reshape(*(state_shape + (-1,)))
skeleton_motion_v = torch.cat([skeleton_state_v, v, av], axis=-1)
return skeleton_motion_v
@classmethod
def from_dict(
cls: Type["SkeletonMotion"], dict_repr: OrderedDict, *args, **kwargs
) -> "SkeletonMotion":
rot = TensorUtils.from_dict(dict_repr["rotation"], *args, **kwargs)
rt = TensorUtils.from_dict(dict_repr["root_translation"], *args, **kwargs)
vel = TensorUtils.from_dict(dict_repr["global_velocity"], *args, **kwargs)
avel = TensorUtils.from_dict(
dict_repr["global_angular_velocity"], *args, **kwargs
)
return cls(
SkeletonMotion._to_state_vector(rot, rt, vel, avel),
skeleton_tree=SkeletonTree.from_dict(
dict_repr["skeleton_tree"], *args, **kwargs
),
is_local=dict_repr["is_local"],
fps=dict_repr["fps"],
)
def to_dict(self) -> OrderedDict:
return OrderedDict(
[
("rotation", tensor_to_dict(self.rotation)),
("root_translation", tensor_to_dict(self.root_translation)),
("global_velocity", tensor_to_dict(self.global_velocity)),
("global_angular_velocity", tensor_to_dict(self.global_angular_velocity)),
("skeleton_tree", self.skeleton_tree.to_dict()),
("is_local", self.is_local),
("fps", self.fps),
]
)
@classmethod
def from_fbx(
cls: Type["SkeletonMotion"],
fbx_file_path,
skeleton_tree=None,
is_local=True,
fps=120,
root_joint="",
root_trans_index=0,
*args,
**kwargs,
) -> "SkeletonMotion":
"""
Construct a skeleton motion from a fbx file (TODO - generalize this). If the skeleton tree
is not given, it will use the first frame of the mocap to construct the skeleton tree.
:param fbx_file_path: the path of the fbx file
:type fbx_file_path: string
:param fbx_configs: the configuration in terms of {"tmp_path": ..., "fbx_py27_path": ...}
:type fbx_configs: dict
:param skeleton_tree: the optional skeleton tree that the rotation will be applied to
:type skeleton_tree: SkeletonTree, optional
:param is_local: the state vector uses local or global rotation as the representation
:type is_local: bool, optional, default=True
:param fps: FPS of the FBX animation
:type fps: int, optional, default=120
:param root_joint: the name of the root joint for the skeleton
:type root_joint: string, optional, default="" or the first node in the FBX scene with animation data
:param root_trans_index: index of joint to extract root transform from
:type root_trans_index: int, optional, default=0 or the root joint in the parsed skeleton
:rtype: SkeletonMotion
"""
joint_names, joint_parents, transforms, fps = fbx_to_array(
fbx_file_path, root_joint, fps
)
# swap the last two axis to match the convention
local_transform = euclidean_to_transform(
transformation_matrix=torch.from_numpy(
np.swapaxes(np.array(transforms), -1, -2),
).float()
)
local_rotation = transform_rotation(local_transform)
root_translation = transform_translation(local_transform)[..., root_trans_index, :]
joint_parents = torch.from_numpy(np.array(joint_parents)).int()
if skeleton_tree is None:
local_translation = transform_translation(local_transform).reshape(
-1, len(joint_parents), 3
)[0]
skeleton_tree = SkeletonTree(joint_names, joint_parents, local_translation)
skeleton_state = SkeletonState.from_rotation_and_root_translation(
skeleton_tree, r=local_rotation, t=root_translation, is_local=True
)
if not is_local:
skeleton_state = skeleton_state.global_repr()
return cls.from_skeleton_state(
skeleton_state=skeleton_state, fps=fps
)
@staticmethod
def _compute_velocity(p, time_delta, guassian_filter=True):
velocity = torch.from_numpy(
filters.gaussian_filter1d(
np.gradient(p.numpy(), axis=-3), 2, axis=-3, mode="nearest"
)
/ time_delta,
)
return velocity
@staticmethod
def _compute_angular_velocity(r, time_delta: float, guassian_filter=True):
# assume the second last dimension is the time axis
diff_quat_data = quat_identity_like(r)
diff_quat_data[..., :-1, :, :] = quat_mul_norm(
r[..., 1:, :, :], quat_inverse(r[..., :-1, :, :])
)
diff_angle, diff_axis = quat_angle_axis(diff_quat_data)
angular_velocity = diff_axis * diff_angle.unsqueeze(-1) / time_delta
angular_velocity = torch.from_numpy(
filters.gaussian_filter1d(
angular_velocity.numpy(), 2, axis=-3, mode="nearest"
),
)
return angular_velocity
def crop(self, start: int, end: int, fps: Optional[int] = None):
"""
Crop the motion along its last axis. This is equivalent to performing a slicing on the
object with [..., start: end: skip_every] where skip_every = old_fps / fps. Note that the
new fps provided must be a factor of the original fps.
:param start: the beginning frame index
:type start: int
:param end: the ending frame index
:type end: int
:param fps: number of frames per second in the output (if not given the original fps will be used)
:type fps: int, optional
:rtype: SkeletonMotion
"""
if fps is None:
new_fps = int(self.fps)
old_fps = int(self.fps)
else:
new_fps = int(fps)
old_fps = int(self.fps)
assert old_fps % fps == 0, (
"the resampling doesn't support fps with non-integer division "
"from the original fps: {} => {}".format(old_fps, fps)
)
skip_every = old_fps // new_fps
return SkeletonMotion.from_skeleton_state(
SkeletonState.from_rotation_and_root_translation(
skeleton_tree=self.skeleton_tree,
t=self.root_translation[start:end:skip_every],
r=self.local_rotation[start:end:skip_every],
is_local=True
),
fps=self.fps
)
def retarget_to(
self,
joint_mapping: Dict[str, str],
source_tpose_local_rotation,
source_tpose_root_translation: np.ndarray,
target_skeleton_tree: "SkeletonTree",
target_tpose_local_rotation,
target_tpose_root_translation: np.ndarray,
rotation_to_target_skeleton,
scale_to_target_skeleton: float,
z_up: bool = True,
) -> "SkeletonMotion":
"""
Same as the one in :class:`SkeletonState`. This method discards all velocity information before
retargeting and re-estimate the velocity after the retargeting. The same fps is used in the
new retargetted motion.
:param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
the target skeleton
:type joint_mapping: Dict[str, str]
:param source_tpose_local_rotation: the local rotation of the source skeleton
:type source_tpose_local_rotation: Tensor
:param source_tpose_root_translation: the root translation of the source tpose
:type source_tpose_root_translation: np.ndarray
:param target_skeleton_tree: the target skeleton tree
:type target_skeleton_tree: SkeletonTree
:param target_tpose_local_rotation: the local rotation of the target skeleton
:type target_tpose_local_rotation: Tensor
:param target_tpose_root_translation: the root translation of the target tpose
:type target_tpose_root_translation: Tensor
:param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
the frame of reference of the target skeleton and s is the frame of reference of the source\
skeleton
:type rotation_to_target_skeleton: Tensor
:param scale_to_target_skeleton: the factor that needs to be multiplied from source\
skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
factor needs to be 0.01.
:type scale_to_target_skeleton: float
:rtype: SkeletonMotion
"""
return SkeletonMotion.from_skeleton_state(
super().retarget_to(
joint_mapping,
source_tpose_local_rotation,
source_tpose_root_translation,
target_skeleton_tree,
target_tpose_local_rotation,
target_tpose_root_translation,
rotation_to_target_skeleton,
scale_to_target_skeleton,
z_up,
),
self.fps,
)
def retarget_to_by_tpose(
self,
joint_mapping: Dict[str, str],
source_tpose: "SkeletonState",
target_tpose: "SkeletonState",
rotation_to_target_skeleton,
scale_to_target_skeleton: float,
z_up: bool = True,
) -> "SkeletonMotion":
"""
Same as the one in :class:`SkeletonState`. This method discards all velocity information before
retargeting and re-estimate the velocity after the retargeting. The same fps is used in the
new retargetted motion.
:param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \
the target skeleton
:type joint_mapping: Dict[str, str]
:param source_tpose: t-pose of the source skeleton
:type source_tpose: SkeletonState
:param target_tpose: t-pose of the target skeleton
:type target_tpose: SkeletonState
:param rotation_to_target_skeleton: the rotation that needs to be applied to the source\
skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\
the frame of reference of the target skeleton and s is the frame of reference of the source\
skeleton
:type rotation_to_target_skeleton: Tensor
:param scale_to_target_skeleton: the factor that needs to be multiplied from source\
skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \
factor needs to be 0.01.
:type scale_to_target_skeleton: float
:rtype: SkeletonMotion
"""
return self.retarget_to(
joint_mapping,
source_tpose.local_rotation,
source_tpose.root_translation,
target_tpose.skeleton_tree,
target_tpose.local_rotation,
target_tpose.root_translation,
rotation_to_target_skeleton,
scale_to_target_skeleton,
z_up,
)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/common.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
from ..core import logger
from .plt_plotter import Matplotlib3DPlotter
from .skeleton_plotter_tasks import Draw3DSkeletonMotion, Draw3DSkeletonState
def plot_skeleton_state(skeleton_state, task_name=""):
"""
Visualize a skeleton state
:param skeleton_state:
:param task_name:
:type skeleton_state: SkeletonState
:type task_name: string, optional
"""
logger.info("plotting {}".format(task_name))
task = Draw3DSkeletonState(task_name=task_name, skeleton_state=skeleton_state)
plotter = Matplotlib3DPlotter(task)
plotter.show()
def plot_skeleton_states(skeleton_state, skip_n=1, task_name=""):
"""
Visualize a sequence of skeleton state. The dimension of the skeleton state must be 1
:param skeleton_state:
:param task_name:
:type skeleton_state: SkeletonState
:type task_name: string, optional
"""
logger.info("plotting {} motion".format(task_name))
assert len(skeleton_state.shape) == 1, "the state must have only one dimension"
task = Draw3DSkeletonState(task_name=task_name, skeleton_state=skeleton_state[0])
plotter = Matplotlib3DPlotter(task)
for frame_id in range(skeleton_state.shape[0]):
if frame_id % skip_n != 0:
continue
task.update(skeleton_state[frame_id])
plotter.update()
plotter.show()
def plot_skeleton_motion(skeleton_motion, skip_n=1, task_name=""):
"""
Visualize a skeleton motion along its first dimension.
:param skeleton_motion:
:param task_name:
:type skeleton_motion: SkeletonMotion
:type task_name: string, optional
"""
logger.info("plotting {} motion".format(task_name))
task = Draw3DSkeletonMotion(
task_name=task_name, skeleton_motion=skeleton_motion, frame_index=0
)
plotter = Matplotlib3DPlotter(task)
for frame_id in range(len(skeleton_motion)):
if frame_id % skip_n != 0:
continue
task.update(frame_id)
plotter.update()
plotter.show()
def plot_skeleton_motion_interactive_base(skeleton_motion, task_name=""):
class PlotParams:
def __init__(self, total_num_frames):
self.current_frame = 0
self.playing = False
self.looping = False
self.confirmed = False
self.playback_speed = 4
self.total_num_frames = total_num_frames
def sync(self, other):
self.current_frame = other.current_frame
self.playing = other.playing
self.looping = other.current_frame
self.confirmed = other.confirmed
self.playback_speed = other.playback_speed
self.total_num_frames = other.total_num_frames
task = Draw3DSkeletonMotion(
task_name=task_name, skeleton_motion=skeleton_motion, frame_index=0
)
plotter = Matplotlib3DPlotter(task)
plot_params = PlotParams(total_num_frames=len(skeleton_motion))
print("Entered interactive plot - press 'n' to quit, 'h' for a list of commands")
def press(event):
if event.key == "x":
plot_params.playing = not plot_params.playing
elif event.key == "z":
plot_params.current_frame = plot_params.current_frame - 1
elif event.key == "c":
plot_params.current_frame = plot_params.current_frame + 1
elif event.key == "a":
plot_params.current_frame = plot_params.current_frame - 20
elif event.key == "d":
plot_params.current_frame = plot_params.current_frame + 20
elif event.key == "w":
plot_params.looping = not plot_params.looping
print("Looping: {}".format(plot_params.looping))
elif event.key == "v":
plot_params.playback_speed *= 2
print("playback speed: {}".format(plot_params.playback_speed))
elif event.key == "b":
if plot_params.playback_speed != 1:
plot_params.playback_speed //= 2
print("playback speed: {}".format(plot_params.playback_speed))
elif event.key == "n":
plot_params.confirmed = True
elif event.key == "h":
rows, columns = os.popen("stty size", "r").read().split()
columns = int(columns)
print("=" * columns)
print("x: play/pause")
print("z: previous frame")
print("c: next frame")
print("a: jump 10 frames back")
print("d: jump 10 frames forward")
print("w: looping/non-looping")
print("v: double speed (this can be applied multiple times)")
print("b: half speed (this can be applied multiple times)")
print("n: quit")
print("h: help")
print("=" * columns)
print(
'current frame index: {}/{} (press "n" to quit)'.format(
plot_params.current_frame, plot_params.total_num_frames - 1
)
)
plotter.fig.canvas.mpl_connect("key_press_event", press)
while True:
reset_trail = False
if plot_params.confirmed:
break
if plot_params.playing:
plot_params.current_frame += plot_params.playback_speed
if plot_params.current_frame >= plot_params.total_num_frames:
if plot_params.looping:
plot_params.current_frame %= plot_params.total_num_frames
reset_trail = True
else:
plot_params.current_frame = plot_params.total_num_frames - 1
if plot_params.current_frame < 0:
if plot_params.looping:
plot_params.current_frame %= plot_params.total_num_frames
reset_trail = True
else:
plot_params.current_frame = 0
yield plot_params
task.update(plot_params.current_frame, reset_trail)
plotter.update()
def plot_skeleton_motion_interactive(skeleton_motion, task_name=""):
"""
Visualize a skeleton motion along its first dimension interactively.
:param skeleton_motion:
:param task_name:
:type skeleton_motion: SkeletonMotion
:type task_name: string, optional
"""
for _ in plot_skeleton_motion_interactive_base(skeleton_motion, task_name):
pass
def plot_skeleton_motion_interactive_multiple(*callables, sync=True):
for _ in zip(*callables):
if sync:
for p1, p2 in zip(_[:-1], _[1:]):
p2.sync(p1)
# def plot_skeleton_motion_interactive_multiple_same(skeleton_motions, task_name=""):
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/core.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
The base abstract classes for plotter and the plotting tasks. It describes how the plotter
deals with the tasks in the general cases
"""
from typing import List
class BasePlotterTask(object):
_task_name: str # unique name of the task
_task_type: str # type of the task is used to identify which callable
def __init__(self, task_name: str, task_type: str) -> None:
self._task_name = task_name
self._task_type = task_type
@property
def task_name(self):
return self._task_name
@property
def task_type(self):
return self._task_type
def get_scoped_name(self, name):
return self._task_name + "/" + name
def __iter__(self):
"""Should override this function to return a list of task primitives
"""
raise NotImplementedError
class BasePlotterTasks(object):
def __init__(self, tasks) -> None:
self._tasks = tasks
def __iter__(self):
for task in self._tasks:
yield from task
class BasePlotter(object):
"""An abstract plotter which deals with a plotting task. The children class needs to implement
the functions to create/update the objects according to the task given
"""
_task_primitives: List[BasePlotterTask]
def __init__(self, task: BasePlotterTask) -> None:
self._task_primitives = []
self.create(task)
@property
def task_primitives(self):
return self._task_primitives
def create(self, task: BasePlotterTask) -> None:
"""Create more task primitives from a task for the plotter"""
new_task_primitives = list(task) # get all task primitives
self._task_primitives += new_task_primitives # append them
self._create_impl(new_task_primitives)
def update(self) -> None:
"""Update the plotter for any updates in the task primitives"""
self._update_impl(self._task_primitives)
def _update_impl(self, task_list: List[BasePlotterTask]) -> None:
raise NotImplementedError
def _create_impl(self, task_list: List[BasePlotterTask]) -> None:
raise NotImplementedError
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/plt_plotter.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
The matplotlib plotter implementation for all the primitive tasks (in our case: lines and
dots)
"""
from typing import Any, Callable, Dict, List
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
import numpy as np
from .core import BasePlotter, BasePlotterTask
class Matplotlib2DPlotter(BasePlotter):
_fig: plt.figure # plt figure
_ax: plt.axis # plt axis
# stores artist objects for each task (task name as the key)
_artist_cache: Dict[str, Any]
# callables for each task primitives
_create_impl_callables: Dict[str, Callable]
_update_impl_callables: Dict[str, Callable]
def __init__(self, task: "BasePlotterTask") -> None:
fig, ax = plt.subplots()
self._fig = fig
self._ax = ax
self._artist_cache = {}
self._create_impl_callables = {
"Draw2DLines": self._lines_create_impl,
"Draw2DDots": self._dots_create_impl,
"Draw2DTrail": self._trail_create_impl,
}
self._update_impl_callables = {
"Draw2DLines": self._lines_update_impl,
"Draw2DDots": self._dots_update_impl,
"Draw2DTrail": self._trail_update_impl,
}
self._init_lim()
super().__init__(task)
@property
def ax(self):
return self._ax
@property
def fig(self):
return self._fig
def show(self):
plt.show()
def _min(self, x, y):
if x is None:
return y
if y is None:
return x
return min(x, y)
def _max(self, x, y):
if x is None:
return y
if y is None:
return x
return max(x, y)
def _init_lim(self):
self._curr_x_min = None
self._curr_y_min = None
self._curr_x_max = None
self._curr_y_max = None
def _update_lim(self, xs, ys):
self._curr_x_min = self._min(np.min(xs), self._curr_x_min)
self._curr_y_min = self._min(np.min(ys), self._curr_y_min)
self._curr_x_max = self._max(np.max(xs), self._curr_x_max)
self._curr_y_max = self._max(np.max(ys), self._curr_y_max)
def _set_lim(self):
if not (
self._curr_x_min is None
or self._curr_x_max is None
or self._curr_y_min is None
or self._curr_y_max is None
):
self._ax.set_xlim(self._curr_x_min, self._curr_x_max)
self._ax.set_ylim(self._curr_y_min, self._curr_y_max)
self._init_lim()
@staticmethod
def _lines_extract_xy_impl(index, lines_task):
return lines_task[index, :, 0], lines_task[index, :, 1]
@staticmethod
def _trail_extract_xy_impl(index, trail_task):
return (trail_task[index : index + 2, 0], trail_task[index : index + 2, 1])
def _lines_create_impl(self, lines_task):
color = lines_task.color
self._artist_cache[lines_task.task_name] = [
self._ax.plot(
*Matplotlib2DPlotter._lines_extract_xy_impl(i, lines_task),
color=color,
linewidth=lines_task.line_width,
alpha=lines_task.alpha
)[0]
for i in range(len(lines_task))
]
def _lines_update_impl(self, lines_task):
lines_artists = self._artist_cache[lines_task.task_name]
for i in range(len(lines_task)):
artist = lines_artists[i]
xs, ys = Matplotlib2DPlotter._lines_extract_xy_impl(i, lines_task)
artist.set_data(xs, ys)
if lines_task.influence_lim:
self._update_lim(xs, ys)
def _dots_create_impl(self, dots_task):
color = dots_task.color
self._artist_cache[dots_task.task_name] = self._ax.plot(
dots_task[:, 0],
dots_task[:, 1],
c=color,
linestyle="",
marker=".",
markersize=dots_task.marker_size,
alpha=dots_task.alpha,
)[0]
def _dots_update_impl(self, dots_task):
dots_artist = self._artist_cache[dots_task.task_name]
dots_artist.set_data(dots_task[:, 0], dots_task[:, 1])
if dots_task.influence_lim:
self._update_lim(dots_task[:, 0], dots_task[:, 1])
def _trail_create_impl(self, trail_task):
color = trail_task.color
trail_length = len(trail_task) - 1
self._artist_cache[trail_task.task_name] = [
self._ax.plot(
*Matplotlib2DPlotter._trail_extract_xy_impl(i, trail_task),
color=trail_task.color,
linewidth=trail_task.line_width,
alpha=trail_task.alpha * (1.0 - i / (trail_length - 1))
)[0]
for i in range(trail_length)
]
def _trail_update_impl(self, trail_task):
trails_artists = self._artist_cache[trail_task.task_name]
for i in range(len(trail_task) - 1):
artist = trails_artists[i]
xs, ys = Matplotlib2DPlotter._trail_extract_xy_impl(i, trail_task)
artist.set_data(xs, ys)
if trail_task.influence_lim:
self._update_lim(xs, ys)
def _create_impl(self, task_list):
for task in task_list:
self._create_impl_callables[task.task_type](task)
self._draw()
def _update_impl(self, task_list):
for task in task_list:
self._update_impl_callables[task.task_type](task)
self._draw()
def _set_aspect_equal_2d(self, zero_centered=True):
xlim = self._ax.get_xlim()
ylim = self._ax.get_ylim()
if not zero_centered:
xmean = np.mean(xlim)
ymean = np.mean(ylim)
else:
xmean = 0
ymean = 0
plot_radius = max(
[
abs(lim - mean_)
for lims, mean_ in ((xlim, xmean), (ylim, ymean))
for lim in lims
]
)
self._ax.set_xlim([xmean - plot_radius, xmean + plot_radius])
self._ax.set_ylim([ymean - plot_radius, ymean + plot_radius])
def _draw(self):
self._set_lim()
self._set_aspect_equal_2d()
self._fig.canvas.draw()
self._fig.canvas.flush_events()
plt.pause(0.00001)
class Matplotlib3DPlotter(BasePlotter):
_fig: plt.figure # plt figure
_ax: p3.Axes3D # plt 3d axis
# stores artist objects for each task (task name as the key)
_artist_cache: Dict[str, Any]
# callables for each task primitives
_create_impl_callables: Dict[str, Callable]
_update_impl_callables: Dict[str, Callable]
def __init__(self, task: "BasePlotterTask") -> None:
self._fig = plt.figure()
self._ax = p3.Axes3D(self._fig)
self._artist_cache = {}
self._create_impl_callables = {
"Draw3DLines": self._lines_create_impl,
"Draw3DDots": self._dots_create_impl,
"Draw3DTrail": self._trail_create_impl,
}
self._update_impl_callables = {
"Draw3DLines": self._lines_update_impl,
"Draw3DDots": self._dots_update_impl,
"Draw3DTrail": self._trail_update_impl,
}
self._init_lim()
super().__init__(task)
@property
def ax(self):
return self._ax
@property
def fig(self):
return self._fig
def show(self):
plt.show()
def _min(self, x, y):
if x is None:
return y
if y is None:
return x
return min(x, y)
def _max(self, x, y):
if x is None:
return y
if y is None:
return x
return max(x, y)
def _init_lim(self):
self._curr_x_min = None
self._curr_y_min = None
self._curr_z_min = None
self._curr_x_max = None
self._curr_y_max = None
self._curr_z_max = None
def _update_lim(self, xs, ys, zs):
self._curr_x_min = self._min(np.min(xs), self._curr_x_min)
self._curr_y_min = self._min(np.min(ys), self._curr_y_min)
self._curr_z_min = self._min(np.min(zs), self._curr_z_min)
self._curr_x_max = self._max(np.max(xs), self._curr_x_max)
self._curr_y_max = self._max(np.max(ys), self._curr_y_max)
self._curr_z_max = self._max(np.max(zs), self._curr_z_max)
def _set_lim(self):
if not (
self._curr_x_min is None
or self._curr_x_max is None
or self._curr_y_min is None
or self._curr_y_max is None
or self._curr_z_min is None
or self._curr_z_max is None
):
self._ax.set_xlim3d(self._curr_x_min, self._curr_x_max)
self._ax.set_ylim3d(self._curr_y_min, self._curr_y_max)
self._ax.set_zlim3d(self._curr_z_min, self._curr_z_max)
self._init_lim()
@staticmethod
def _lines_extract_xyz_impl(index, lines_task):
return lines_task[index, :, 0], lines_task[index, :, 1], lines_task[index, :, 2]
@staticmethod
def _trail_extract_xyz_impl(index, trail_task):
return (
trail_task[index : index + 2, 0],
trail_task[index : index + 2, 1],
trail_task[index : index + 2, 2],
)
def _lines_create_impl(self, lines_task):
color = lines_task.color
self._artist_cache[lines_task.task_name] = [
self._ax.plot(
*Matplotlib3DPlotter._lines_extract_xyz_impl(i, lines_task),
color=color,
linewidth=lines_task.line_width,
alpha=lines_task.alpha
)[0]
for i in range(len(lines_task))
]
def _lines_update_impl(self, lines_task):
lines_artists = self._artist_cache[lines_task.task_name]
for i in range(len(lines_task)):
artist = lines_artists[i]
xs, ys, zs = Matplotlib3DPlotter._lines_extract_xyz_impl(i, lines_task)
artist.set_data(xs, ys)
artist.set_3d_properties(zs)
if lines_task.influence_lim:
self._update_lim(xs, ys, zs)
def _dots_create_impl(self, dots_task):
color = dots_task.color
self._artist_cache[dots_task.task_name] = self._ax.plot(
dots_task[:, 0],
dots_task[:, 1],
dots_task[:, 2],
c=color,
linestyle="",
marker=".",
markersize=dots_task.marker_size,
alpha=dots_task.alpha,
)[0]
def _dots_update_impl(self, dots_task):
dots_artist = self._artist_cache[dots_task.task_name]
dots_artist.set_data(dots_task[:, 0], dots_task[:, 1])
dots_artist.set_3d_properties(dots_task[:, 2])
if dots_task.influence_lim:
self._update_lim(dots_task[:, 0], dots_task[:, 1], dots_task[:, 2])
def _trail_create_impl(self, trail_task):
color = trail_task.color
trail_length = len(trail_task) - 1
self._artist_cache[trail_task.task_name] = [
self._ax.plot(
*Matplotlib3DPlotter._trail_extract_xyz_impl(i, trail_task),
color=trail_task.color,
linewidth=trail_task.line_width,
alpha=trail_task.alpha * (1.0 - i / (trail_length - 1))
)[0]
for i in range(trail_length)
]
def _trail_update_impl(self, trail_task):
trails_artists = self._artist_cache[trail_task.task_name]
for i in range(len(trail_task) - 1):
artist = trails_artists[i]
xs, ys, zs = Matplotlib3DPlotter._trail_extract_xyz_impl(i, trail_task)
artist.set_data(xs, ys)
artist.set_3d_properties(zs)
if trail_task.influence_lim:
self._update_lim(xs, ys, zs)
def _create_impl(self, task_list):
for task in task_list:
self._create_impl_callables[task.task_type](task)
self._draw()
def _update_impl(self, task_list):
for task in task_list:
self._update_impl_callables[task.task_type](task)
self._draw()
def _set_aspect_equal_3d(self):
xlim = self._ax.get_xlim3d()
ylim = self._ax.get_ylim3d()
zlim = self._ax.get_zlim3d()
xmean = np.mean(xlim)
ymean = np.mean(ylim)
zmean = np.mean(zlim)
plot_radius = max(
[
abs(lim - mean_)
for lims, mean_ in ((xlim, xmean), (ylim, ymean), (zlim, zmean))
for lim in lims
]
)
self._ax.set_xlim3d([xmean - plot_radius, xmean + plot_radius])
self._ax.set_ylim3d([ymean - plot_radius, ymean + plot_radius])
self._ax.set_zlim3d([zmean - plot_radius, zmean + plot_radius])
def _draw(self):
self._set_lim()
self._set_aspect_equal_3d()
self._fig.canvas.draw()
self._fig.canvas.flush_events()
plt.pause(0.00001)
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/simple_plotter_tasks.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This is where all the task primitives are defined
"""
import numpy as np
from .core import BasePlotterTask
class DrawXDLines(BasePlotterTask):
_lines: np.ndarray
_color: str
_line_width: int
_alpha: float
_influence_lim: bool
def __init__(
self,
task_name: str,
lines: np.ndarray,
color: str = "blue",
line_width: int = 2,
alpha: float = 1.0,
influence_lim: bool = True,
) -> None:
super().__init__(task_name=task_name, task_type=self.__class__.__name__)
self._color = color
self._line_width = line_width
self._alpha = alpha
self._influence_lim = influence_lim
self.update(lines)
@property
def influence_lim(self) -> bool:
return self._influence_lim
@property
def raw_data(self):
return self._lines
@property
def color(self):
return self._color
@property
def line_width(self):
return self._line_width
@property
def alpha(self):
return self._alpha
@property
def dim(self):
raise NotImplementedError
@property
def name(self):
return "{}DLines".format(self.dim)
def update(self, lines):
self._lines = np.array(lines)
shape = self._lines.shape
assert shape[-1] == self.dim and shape[-2] == 2 and len(shape) == 3
def __getitem__(self, index):
return self._lines[index]
def __len__(self):
return self._lines.shape[0]
def __iter__(self):
yield self
class DrawXDDots(BasePlotterTask):
_dots: np.ndarray
_color: str
_marker_size: int
_alpha: float
_influence_lim: bool
def __init__(
self,
task_name: str,
dots: np.ndarray,
color: str = "blue",
marker_size: int = 10,
alpha: float = 1.0,
influence_lim: bool = True,
) -> None:
super().__init__(task_name=task_name, task_type=self.__class__.__name__)
self._color = color
self._marker_size = marker_size
self._alpha = alpha
self._influence_lim = influence_lim
self.update(dots)
def update(self, dots):
self._dots = np.array(dots)
shape = self._dots.shape
assert shape[-1] == self.dim and len(shape) == 2
def __getitem__(self, index):
return self._dots[index]
def __len__(self):
return self._dots.shape[0]
def __iter__(self):
yield self
@property
def influence_lim(self) -> bool:
return self._influence_lim
@property
def raw_data(self):
return self._dots
@property
def color(self):
return self._color
@property
def marker_size(self):
return self._marker_size
@property
def alpha(self):
return self._alpha
@property
def dim(self):
raise NotImplementedError
@property
def name(self):
return "{}DDots".format(self.dim)
class DrawXDTrail(DrawXDDots):
@property
def line_width(self):
return self.marker_size
@property
def name(self):
return "{}DTrail".format(self.dim)
class Draw2DLines(DrawXDLines):
@property
def dim(self):
return 2
class Draw3DLines(DrawXDLines):
@property
def dim(self):
return 3
class Draw2DDots(DrawXDDots):
@property
def dim(self):
return 2
class Draw3DDots(DrawXDDots):
@property
def dim(self):
return 3
class Draw2DTrail(DrawXDTrail):
@property
def dim(self):
return 2
class Draw3DTrail(DrawXDTrail):
@property
def dim(self):
return 3
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/skeleton_plotter_tasks.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This is where all skeleton related complex tasks are defined (skeleton state and skeleton
motion)
"""
import numpy as np
from .core import BasePlotterTask
from .simple_plotter_tasks import Draw3DDots, Draw3DLines, Draw3DTrail
class Draw3DSkeletonState(BasePlotterTask):
_lines_task: Draw3DLines # sub-task for drawing lines
_dots_task: Draw3DDots # sub-task for drawing dots
def __init__(
self,
task_name: str,
skeleton_state,
joints_color: str = "red",
lines_color: str = "blue",
alpha=1.0,
) -> None:
super().__init__(task_name=task_name, task_type="3DSkeletonState")
lines, dots = Draw3DSkeletonState._get_lines_and_dots(skeleton_state)
self._lines_task = Draw3DLines(
self.get_scoped_name("bodies"), lines, joints_color, alpha=alpha
)
self._dots_task = Draw3DDots(
self.get_scoped_name("joints"), dots, lines_color, alpha=alpha
)
@property
def name(self):
return "3DSkeleton"
def update(self, skeleton_state) -> None:
self._update(*Draw3DSkeletonState._get_lines_and_dots(skeleton_state))
@staticmethod
def _get_lines_and_dots(skeleton_state):
"""Get all the lines and dots needed to draw the skeleton state
"""
assert (
len(skeleton_state.tensor.shape) == 1
), "the state has to be zero dimensional"
dots = skeleton_state.global_translation.numpy()
skeleton_tree = skeleton_state.skeleton_tree
parent_indices = skeleton_tree.parent_indices.numpy()
lines = []
for node_index in range(len(skeleton_tree)):
parent_index = parent_indices[node_index]
if parent_index != -1:
lines.append([dots[node_index], dots[parent_index]])
lines = np.array(lines)
return lines, dots
def _update(self, lines, dots) -> None:
self._lines_task.update(lines)
self._dots_task.update(dots)
def __iter__(self):
yield from self._lines_task
yield from self._dots_task
class Draw3DSkeletonMotion(BasePlotterTask):
def __init__(
self,
task_name: str,
skeleton_motion,
frame_index=None,
joints_color="red",
lines_color="blue",
velocity_color="green",
angular_velocity_color="purple",
trail_color="black",
trail_length=10,
alpha=1.0,
) -> None:
super().__init__(task_name=task_name, task_type="3DSkeletonMotion")
self._trail_length = trail_length
self._skeleton_motion = skeleton_motion
# if frame_index is None:
curr_skeleton_motion = self._skeleton_motion.clone()
if frame_index is not None:
curr_skeleton_motion.tensor = self._skeleton_motion.tensor[frame_index, :]
# else:
# curr_skeleton_motion = self._skeleton_motion[frame_index, :]
self._skeleton_state_task = Draw3DSkeletonState(
self.get_scoped_name("skeleton_state"),
curr_skeleton_motion,
joints_color=joints_color,
lines_color=lines_color,
alpha=alpha,
)
vel_lines, avel_lines = Draw3DSkeletonMotion._get_vel_and_avel(
curr_skeleton_motion
)
self._com_pos = curr_skeleton_motion.root_translation.numpy()[
np.newaxis, ...
].repeat(trail_length, axis=0)
self._vel_task = Draw3DLines(
self.get_scoped_name("velocity"),
vel_lines,
velocity_color,
influence_lim=False,
alpha=alpha,
)
self._avel_task = Draw3DLines(
self.get_scoped_name("angular_velocity"),
avel_lines,
angular_velocity_color,
influence_lim=False,
alpha=alpha,
)
self._com_trail_task = Draw3DTrail(
self.get_scoped_name("com_trail"),
self._com_pos,
trail_color,
marker_size=2,
influence_lim=True,
alpha=alpha,
)
@property
def name(self):
return "3DSkeletonMotion"
def update(self, frame_index=None, reset_trail=False, skeleton_motion=None) -> None:
if skeleton_motion is not None:
self._skeleton_motion = skeleton_motion
curr_skeleton_motion = self._skeleton_motion.clone()
if frame_index is not None:
curr_skeleton_motion.tensor = curr_skeleton_motion.tensor[frame_index, :]
if reset_trail:
self._com_pos = curr_skeleton_motion.root_translation.numpy()[
np.newaxis, ...
].repeat(self._trail_length, axis=0)
else:
self._com_pos = np.concatenate(
(
curr_skeleton_motion.root_translation.numpy()[np.newaxis, ...],
self._com_pos[:-1],
),
axis=0,
)
self._skeleton_state_task.update(curr_skeleton_motion)
self._com_trail_task.update(self._com_pos)
self._update(*Draw3DSkeletonMotion._get_vel_and_avel(curr_skeleton_motion))
@staticmethod
def _get_vel_and_avel(skeleton_motion):
"""Get all the velocity and angular velocity lines
"""
pos = skeleton_motion.global_translation.numpy()
vel = skeleton_motion.global_velocity.numpy()
avel = skeleton_motion.global_angular_velocity.numpy()
vel_lines = np.stack((pos, pos + vel * 0.02), axis=1)
avel_lines = np.stack((pos, pos + avel * 0.01), axis=1)
return vel_lines, avel_lines
def _update(self, vel_lines, avel_lines) -> None:
self._vel_task.update(vel_lines)
self._avel_task.update(avel_lines)
def __iter__(self):
yield from self._skeleton_state_task
yield from self._vel_task
yield from self._avel_task
yield from self._com_trail_task
class Draw3DSkeletonMotions(BasePlotterTask):
def __init__(self, skeleton_motion_tasks) -> None:
self._skeleton_motion_tasks = skeleton_motion_tasks
@property
def name(self):
return "3DSkeletonMotions"
def update(self, frame_index) -> None:
list(map(lambda x: x.update(frame_index), self._skeleton_motion_tasks))
def __iter__(self):
yield from self._skeleton_state_tasks
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/tests/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/tests/test_plotter.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from typing import cast
import matplotlib.pyplot as plt
import numpy as np
from ..core import BasePlotterTask, BasePlotterTasks
from ..plt_plotter import Matplotlib3DPlotter
from ..simple_plotter_tasks import Draw3DDots, Draw3DLines
task = Draw3DLines(task_name="test",
lines=np.array([[[0, 0, 0], [0, 0, 1]], [[0, 1, 1], [0, 1, 0]]]), color="blue")
task2 = Draw3DDots(task_name="test2",
dots=np.array([[0, 0, 0], [0, 0, 1], [0, 1, 1], [0, 1, 0]]), color="red")
task3 = BasePlotterTasks([task, task2])
plotter = Matplotlib3DPlotter(cast(BasePlotterTask, task3))
plt.show()
================================================
FILE: timechamber/tasks/ase_humanoid_base/poselib/retarget_motion.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from isaacgym.torch_utils import *
import torch
import json
import numpy as np
from poselib.core.rotation3d import *
from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState, SkeletonMotion
from poselib.visualization.common import plot_skeleton_state, plot_skeleton_motion_interactive
"""
This scripts shows how to retarget a motion clip from the source skeleton to a target skeleton.
Data required for retargeting are stored in a retarget config dictionary as a json file. This file contains:
- source_motion: a SkeletonMotion npy format representation of a motion sequence. The motion clip should use the same skeleton as the source T-Pose skeleton.
- target_motion_path: path to save the retargeted motion to
- source_tpose: a SkeletonState npy format representation of the source skeleton in it's T-Pose state
- target_tpose: a SkeletonState npy format representation of the target skeleton in it's T-Pose state (pose should match source T-Pose)
- joint_mapping: mapping of joint names from source to target
- rotation: root rotation offset from source to target skeleton (for transforming across different orientation axes), represented as a quaternion in XYZW order.
- scale: scale offset from source to target skeleton
"""
VISUALIZE = False
def project_joints(motion):
right_upper_arm_id = motion.skeleton_tree._node_indices["right_upper_arm"]
right_lower_arm_id = motion.skeleton_tree._node_indices["right_lower_arm"]
right_hand_id = motion.skeleton_tree._node_indices["right_hand"]
left_upper_arm_id = motion.skeleton_tree._node_indices["left_upper_arm"]
left_lower_arm_id = motion.skeleton_tree._node_indices["left_lower_arm"]
left_hand_id = motion.skeleton_tree._node_indices["left_hand"]
right_thigh_id = motion.skeleton_tree._node_indices["right_thigh"]
right_shin_id = motion.skeleton_tree._node_indices["right_shin"]
right_foot_id = motion.skeleton_tree._node_indices["right_foot"]
left_thigh_id = motion.skeleton_tree._node_indices["left_thigh"]
left_shin_id = motion.skeleton_tree._node_indices["left_shin"]
left_foot_id = motion.skeleton_tree._node_indices["left_foot"]
device = motion.global_translation.device
# right arm
right_upper_arm_pos = motion.global_translation[..., right_upper_arm_id, :]
right_lower_arm_pos = motion.global_translation[..., right_lower_arm_id, :]
right_hand_pos = motion.global_translation[..., right_hand_id, :]
right_shoulder_rot = motion.local_rotation[..., right_upper_arm_id, :]
right_elbow_rot = motion.local_rotation[..., right_lower_arm_id, :]
right_arm_delta0 = right_upper_arm_pos - right_lower_arm_pos
right_arm_delta1 = right_hand_pos - right_lower_arm_pos
right_arm_delta0 = right_arm_delta0 / torch.norm(right_arm_delta0, dim=-1, keepdim=True)
right_arm_delta1 = right_arm_delta1 / torch.norm(right_arm_delta1, dim=-1, keepdim=True)
right_elbow_dot = torch.sum(-right_arm_delta0 * right_arm_delta1, dim=-1)
right_elbow_dot = torch.clamp(right_elbow_dot, -1.0, 1.0)
right_elbow_theta = torch.acos(right_elbow_dot)
right_elbow_q = quat_from_angle_axis(-torch.abs(right_elbow_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]),
device=device, dtype=torch.float32))
right_elbow_local_dir = motion.skeleton_tree.local_translation[right_hand_id]
right_elbow_local_dir = right_elbow_local_dir / torch.norm(right_elbow_local_dir)
right_elbow_local_dir_tile = torch.tile(right_elbow_local_dir.unsqueeze(0), [right_elbow_rot.shape[0], 1])
right_elbow_local_dir0 = quat_rotate(right_elbow_rot, right_elbow_local_dir_tile)
right_elbow_local_dir1 = quat_rotate(right_elbow_q, right_elbow_local_dir_tile)
right_arm_dot = torch.sum(right_elbow_local_dir0 * right_elbow_local_dir1, dim=-1)
right_arm_dot = torch.clamp(right_arm_dot, -1.0, 1.0)
right_arm_theta = torch.acos(right_arm_dot)
right_arm_theta = torch.where(right_elbow_local_dir0[..., 1] <= 0, right_arm_theta, -right_arm_theta)
right_arm_q = quat_from_angle_axis(right_arm_theta, right_elbow_local_dir.unsqueeze(0))
right_shoulder_rot = quat_mul(right_shoulder_rot, right_arm_q)
# left arm
left_upper_arm_pos = motion.global_translation[..., left_upper_arm_id, :]
left_lower_arm_pos = motion.global_translation[..., left_lower_arm_id, :]
left_hand_pos = motion.global_translation[..., left_hand_id, :]
left_shoulder_rot = motion.local_rotation[..., left_upper_arm_id, :]
left_elbow_rot = motion.local_rotation[..., left_lower_arm_id, :]
left_arm_delta0 = left_upper_arm_pos - left_lower_arm_pos
left_arm_delta1 = left_hand_pos - left_lower_arm_pos
left_arm_delta0 = left_arm_delta0 / torch.norm(left_arm_delta0, dim=-1, keepdim=True)
left_arm_delta1 = left_arm_delta1 / torch.norm(left_arm_delta1, dim=-1, keepdim=True)
left_elbow_dot = torch.sum(-left_arm_delta0 * left_arm_delta1, dim=-1)
left_elbow_dot = torch.clamp(left_elbow_dot, -1.0, 1.0)
left_elbow_theta = torch.acos(left_elbow_dot)
left_elbow_q = quat_from_angle_axis(-torch.abs(left_elbow_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]),
device=device, dtype=torch.float32))
left_elbow_local_dir = motion.skeleton_tree.local_translation[left_hand_id]
left_elbow_local_dir = left_elbow_local_dir / torch.norm(left_elbow_local_dir)
left_elbow_local_dir_tile = torch.tile(left_elbow_local_dir.unsqueeze(0), [left_elbow_rot.shape[0], 1])
left_elbow_local_dir0 = quat_rotate(left_elbow_rot, left_elbow_local_dir_tile)
left_elbow_local_dir1 = quat_rotate(left_elbow_q, left_elbow_local_dir_tile)
left_arm_dot = torch.sum(left_elbow_local_dir0 * left_elbow_local_dir1, dim=-1)
left_arm_dot = torch.clamp(left_arm_dot, -1.0, 1.0)
left_arm_theta = torch.acos(left_arm_dot)
left_arm_theta = torch.where(left_elbow_local_dir0[..., 1] <= 0, left_arm_theta, -left_arm_theta)
left_arm_q = quat_from_angle_axis(left_arm_theta, left_elbow_local_dir.unsqueeze(0))
left_shoulder_rot = quat_mul(left_shoulder_rot, left_arm_q)
# right leg
right_thigh_pos = motion.global_translation[..., right_thigh_id, :]
right_shin_pos = motion.global_translation[..., right_shin_id, :]
right_foot_pos = motion.global_translation[..., right_foot_id, :]
right_hip_rot = motion.local_rotation[..., right_thigh_id, :]
right_knee_rot = motion.local_rotation[..., right_shin_id, :]
right_leg_delta0 = right_thigh_pos - right_shin_pos
right_leg_delta1 = right_foot_pos - right_shin_pos
right_leg_delta0 = right_leg_delta0 / torch.norm(right_leg_delta0, dim=-1, keepdim=True)
right_leg_delta1 = right_leg_delta1 / torch.norm(right_leg_delta1, dim=-1, keepdim=True)
right_knee_dot = torch.sum(-right_leg_delta0 * right_leg_delta1, dim=-1)
right_knee_dot = torch.clamp(right_knee_dot, -1.0, 1.0)
right_knee_theta = torch.acos(right_knee_dot)
right_knee_q = quat_from_angle_axis(torch.abs(right_knee_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]),
device=device, dtype=torch.float32))
right_knee_local_dir = motion.skeleton_tree.local_translation[right_foot_id]
right_knee_local_dir = right_knee_local_dir / torch.norm(right_knee_local_dir)
right_knee_local_dir_tile = torch.tile(right_knee_local_dir.unsqueeze(0), [right_knee_rot.shape[0], 1])
right_knee_local_dir0 = quat_rotate(right_knee_rot, right_knee_local_dir_tile)
right_knee_local_dir1 = quat_rotate(right_knee_q, right_knee_local_dir_tile)
right_leg_dot = torch.sum(right_knee_local_dir0 * right_knee_local_dir1, dim=-1)
right_leg_dot = torch.clamp(right_leg_dot, -1.0, 1.0)
right_leg_theta = torch.acos(right_leg_dot)
right_leg_theta = torch.where(right_knee_local_dir0[..., 1] >= 0, right_leg_theta, -right_leg_theta)
right_leg_q = quat_from_angle_axis(right_leg_theta, right_knee_local_dir.unsqueeze(0))
right_hip_rot = quat_mul(right_hip_rot, right_leg_q)
# left leg
left_thigh_pos = motion.global_translation[..., left_thigh_id, :]
left_shin_pos = motion.global_translation[..., left_shin_id, :]
left_foot_pos = motion.global_translation[..., left_foot_id, :]
left_hip_rot = motion.local_rotation[..., left_thigh_id, :]
left_knee_rot = motion.local_rotation[..., left_shin_id, :]
left_leg_delta0 = left_thigh_pos - left_shin_pos
left_leg_delta1 = left_foot_pos - left_shin_pos
left_leg_delta0 = left_leg_delta0 / torch.norm(left_leg_delta0, dim=-1, keepdim=True)
left_leg_delta1 = left_leg_delta1 / torch.norm(left_leg_delta1, dim=-1, keepdim=True)
left_knee_dot = torch.sum(-left_leg_delta0 * left_leg_delta1, dim=-1)
left_knee_dot = torch.clamp(left_knee_dot, -1.0, 1.0)
left_knee_theta = torch.acos(left_knee_dot)
left_knee_q = quat_from_angle_axis(torch.abs(left_knee_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]),
device=device, dtype=torch.float32))
left_knee_local_dir = motion.skeleton_tree.local_translation[left_foot_id]
left_knee_local_dir = left_knee_local_dir / torch.norm(left_knee_local_dir)
left_knee_local_dir_tile = torch.tile(left_knee_local_dir.unsqueeze(0), [left_knee_rot.shape[0], 1])
left_knee_local_dir0 = quat_rotate(left_knee_rot, left_knee_local_dir_tile)
left_knee_local_dir1 = quat_rotate(left_knee_q, left_knee_local_dir_tile)
left_leg_dot = torch.sum(left_knee_local_dir0 * left_knee_local_dir1, dim=-1)
left_leg_dot = torch.clamp(left_leg_dot, -1.0, 1.0)
left_leg_theta = torch.acos(left_leg_dot)
left_leg_theta = torch.where(left_knee_local_dir0[..., 1] >= 0, left_leg_theta, -left_leg_theta)
left_leg_q = quat_from_angle_axis(left_leg_theta, left_knee_local_dir.unsqueeze(0))
left_hip_rot = quat_mul(left_hip_rot, left_leg_q)
new_local_rotation = motion.local_rotation.clone()
new_local_rotation[..., right_upper_arm_id, :] = right_shoulder_rot
new_local_rotation[..., right_lower_arm_id, :] = right_elbow_q
new_local_rotation[..., left_upper_arm_id, :] = left_shoulder_rot
new_local_rotation[..., left_lower_arm_id, :] = left_elbow_q
new_local_rotation[..., right_thigh_id, :] = right_hip_rot
new_local_rotation[..., right_shin_id, :] = right_knee_q
new_local_rotation[..., left_thigh_id, :] = left_hip_rot
new_local_rotation[..., left_shin_id, :] = left_knee_q
new_local_rotation[..., left_hand_id, :] = quat_identity([1])
new_local_rotation[..., right_hand_id, :] = quat_identity([1])
new_sk_state = SkeletonState.from_rotation_and_root_translation(motion.skeleton_tree, new_local_rotation, motion.root_translation, is_local=True)
new_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=motion.fps)
return new_motion
def main():
# load retarget config
retarget_data_path = "data/configs/retarget_cmu_to_amp.json"
with open(retarget_data_path) as f:
retarget_data = json.load(f)
# load and visualize t-pose files
source_tpose = SkeletonState.from_file(retarget_data["source_tpose"])
if VISUALIZE:
plot_skeleton_state(source_tpose)
target_tpose = SkeletonState.from_file(retarget_data["target_tpose"])
if VISUALIZE:
plot_skeleton_state(target_tpose)
# load and visualize source motion sequence
source_motion = SkeletonMotion.from_file(retarget_data["source_motion"])
if VISUALIZE:
plot_skeleton_motion_interactive(source_motion)
# parse data from retarget config
joint_mapping = retarget_data["joint_mapping"]
rotation_to_target_skeleton = torch.tensor(retarget_data["rotation"])
# run retargeting
target_motion = source_motion.retarget_to_by_tpose(
joint_mapping=retarget_data["joint_mapping"],
source_tpose=source_tpose,
target_tpose=target_tpose,
rotation_to_target_skeleton=rotation_to_target_skeleton,
scale_to_target_skeleton=retarget_data["scale"]
)
# keep frames between [trim_frame_beg, trim_frame_end - 1]
frame_beg = retarget_data["trim_frame_beg"]
frame_end = retarget_data["trim_frame_end"]
if (frame_beg == -1):
frame_beg = 0
if (frame_end == -1):
frame_end = target_motion.local_rotation.shape[0]
local_rotation = target_motion.local_rotation
root_translation = target_motion.root_translation
local_rotation = local_rotation[frame_beg:frame_end, ...]
root_translation = root_translation[frame_beg:frame_end, ...]
new_sk_state = SkeletonState.from_rotation_and_root_translation(target_motion.skeleton_tree, local_rotation, root_translation, is_local=True)
target_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=target_motion.fps)
# need to convert some joints from 3D to 1D (e.g. elbows and knees)
target_motion = project_joints(target_motion)
# move the root so that the feet are on the ground
local_rotation = target_motion.local_rotation
root_translation = target_motion.root_translation
tar_global_pos = target_motion.global_translation
min_h = torch.min(tar_global_pos[..., 2])
root_translation[:, 2] += -min_h
# adjust the height of the root to avoid ground penetration
root_height_offset = retarget_data["root_height_offset"]
root_translation[:, 2] += root_height_offset
new_sk_state = SkeletonState.from_rotation_and_root_translation(target_motion.skeleton_tree, local_rotation, root_translation, is_local=True)
target_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=target_motion.fps)
# save retargeted motion
target_motion.to_file(retarget_data["target_motion_path"])
# visualize retargeted motion
plot_skeleton_motion_interactive(target_motion)
return
if __name__ == '__main__':
main()
================================================
FILE: timechamber/tasks/base/__init__.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: timechamber/tasks/base/ma_vec_task.py
================================================
# Copyright (c) 2018-2021, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import Dict, Any, Tuple
import gym
from gym import spaces
from isaacgym import gymtorch, gymapi
from isaacgym.torch_utils import to_torch
from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, \
apply_random_samples, check_buckets, generate_random_samples
import torch
import numpy as np
import operator, random
from copy import deepcopy
import sys
import abc
from .vec_task import Env
class MA_VecTask(Env):
def __init__(self, config, rl_device, sim_device, graphics_device_id, headless,
virtual_screen_capture: bool = False, force_render: bool = False):
"""Initialise the `MA_VecTask`.
Args:
config: config dictionary for the environment.
sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu'
graphics_device_id: the device ID to render with.
headless: Set to False to disable viewer rendering.
"""
super().__init__(config, rl_device, sim_device, graphics_device_id, headless)
self.virtual_screen_capture = virtual_screen_capture
self.force_render = force_render
self.sim_params = self.__parse_sim_params(self.cfg["physics_engine"], self.cfg["sim"])
if self.cfg["physics_engine"] == "physx":
self.physics_engine = gymapi.SIM_PHYSX
elif self.cfg["physics_engine"] == "flex":
self.physics_engine = gymapi.SIM_FLEX
else:
msg = f"Invalid physics engine backend: {self.cfg['physics_engine']}"
raise ValueError(msg)
# optimization flags for pytorch JIT
torch._C._jit_set_profiling_mode(False)
torch._C._jit_set_profiling_executor(False)
self.gym = gymapi.acquire_gym()
self.first_randomization = True
self.original_props = {}
self.dr_randomizations = {}
self.actor_params_generator = None
self.extern_actor_params = {}
self.last_step = -1
self.last_rand_step = -1
for env_id in range(self.num_envs):
self.extern_actor_params[env_id] = None
# create envs, sim and viewer
self.sim_initialized = False
self.create_sim()
self.gym.prepare_sim(self.sim)
self.sim_initialized = True
self.set_viewer()
self.allocate_buffers()
self.obs_dict = {}
def set_viewer(self):
"""Create the viewer."""
# todo: read from config
self.enable_viewer_sync = True
self.viewer = None
# if running with a viewer, set up keyboard shortcuts and camera
if self.headless == False:
# subscribe to keyboard shortcuts
self.viewer = self.gym.create_viewer(
self.sim, gymapi.CameraProperties())
self.gym.subscribe_viewer_keyboard_event(
self.viewer, gymapi.KEY_ESCAPE, "QUIT")
self.gym.subscribe_viewer_keyboard_event(
self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
# set the camera position based on up axis
sim_params = self.gym.get_sim_params(self.sim)
if sim_params.up_axis == gymapi.UP_AXIS_Z:
cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
else:
cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
cam_target = gymapi.Vec3(10.0, 0.0, 15.0)
self.gym.viewer_camera_look_at(
self.viewer, None, cam_pos, cam_target)
def allocate_buffers(self):
"""Allocate the observation, states, etc. buffers.
These are what is used to set observations and states in the environment classes which
inherit from this one, and are read in `step` and other related functions.
"""
# allocate buffers
self.obs_buf = torch.zeros(
(self.num_envs * self.num_agents, self.num_obs), device=self.device, dtype=torch.float)
self.states_buf = torch.zeros(
(self.num_envs, self.num_states), device=self.device, dtype=torch.float)
self.rew_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.float)
self.reset_buf = torch.ones(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.timeout_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.progress_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.randomize_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.extras = {}
def set_sim_params_up_axis(self, sim_params: gymapi.SimParams, axis: str) -> int:
"""Set gravity based on up axis and return axis index.
Args:
sim_params: sim params to modify the axis for.
axis: axis to set sim params for.
Returns:
axis index for up axis.
"""
if axis == 'z':
sim_params.up_axis = gymapi.UP_AXIS_Z
sim_params.gravity.x = 0
sim_params.gravity.y = 0
sim_params.gravity.z = -9.81
return 2
return 1
def create_sim(self, compute_device: int, graphics_device: int, physics_engine, sim_params: gymapi.SimParams):
"""Create an Isaac Gym sim object.
Args:
compute_device: ID of compute device to use.
graphics_device: ID of graphics device to use.
physics_engine: physics engine to use (`gymapi.SIM_PHYSX` or `gymapi.SIM_FLEX`)
sim_params: sim params to use.
Returns:
the Isaac Gym sim object.
"""
sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params)
if sim is None:
print("*** Failed to create sim")
quit()
return sim
def get_state(self):
"""Returns the state buffer of the environment (the priviledged observations for asymmetric training)."""
return torch.clamp(self.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
@abc.abstractmethod
def pre_physics_step(self, actions: torch.Tensor):
"""Apply the actions to the environment (eg by setting torques, position targets).
Args:
actions: the actions to apply
"""
@abc.abstractmethod
def post_physics_step(self):
"""Compute reward and observations, reset any environments that require it."""
def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]:
"""Step the physics of the environment.
Args:
actions: actions to apply
Returns:
Observations, rewards, resets, info
Observations are dict of observations (currently only one member called 'obs')
"""
# randomize actions
if self.dr_randomizations.get('actions', None):
actions = self.dr_randomizations['actions']['noise_lambda'](actions)
# apply actions
self.pre_physics_step(actions)
# step physics and render each frame
for i in range(self.control_freq_inv):
if self.force_render:
self.render()
self.gym.simulate(self.sim)
# to fix!
if self.device == 'cpu':
self.gym.fetch_results(self.sim, True)
# fill time out buffer
self.timeout_buf = torch.where(self.progress_buf >= self.max_episode_length - 1,
torch.ones_like(self.timeout_buf), torch.zeros_like(self.timeout_buf))
# compute observations, rewards, resets, ...
self.post_physics_step()
# randomize observations
if self.dr_randomizations.get('observations', None):
self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)
self.extras["time_outs"] = self.timeout_buf.to(self.rl_device)
return
def zero_actions(self) -> torch.Tensor:
"""Returns a buffer with zero actions.
Returns:
A buffer of zero torch actions
"""
actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32,
device=self.rl_device)
return actions
def reset(self, env_ids=None) -> torch.Tensor:
"""Reset the environment.
"""
if (env_ids is None):
# zero_actions = self.zero_actions()
# self.step(zero_actions)
env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long)
self.reset_idx(env_ids)
self.compute_observations()
self.pos_before = self.obs_buf[:self.num_envs, :2].clone()
else:
self._reset_envs(env_ids=env_ids)
return
def _reset_envs(self, env_ids):
if (len(env_ids) > 0):
self.reset_idx(env_ids)
self.compute_observations()
self.pos_before = self.obs_buf[:self.num_envs, :2].clone()
return
def reset_done(self):
"""Reset the environment.
Returns:
Observation dictionary, indices of environments being reset
"""
done_env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
if len(done_env_ids) > 0:
self.reset_idx(done_env_ids)
self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
# asymmetric actor-critic
if self.num_states > 0:
self.obs_dict["states"] = self.get_state()
return self.obs_dict, done_env_ids
def render(self):
"""Draw the frame to the viewer, and check for keyboard events."""
if self.viewer:
# check for window closed
if self.gym.query_viewer_has_closed(self.viewer):
sys.exit()
# check for keyboard events
for evt in self.gym.query_viewer_action_events(self.viewer):
if evt.action == "QUIT" and evt.value > 0:
sys.exit()
elif evt.action == "toggle_viewer_sync" and evt.value > 0:
self.enable_viewer_sync = not self.enable_viewer_sync
# fetch results
if self.device != 'cpu':
self.gym.fetch_results(self.sim, True)
# step graphics
if self.enable_viewer_sync:
self.gym.step_graphics(self.sim)
self.gym.draw_viewer(self.viewer, self.sim, True)
# Wait for dt to elapse in real time.
# This synchronizes the physics simulation with the rendering rate.
self.gym.sync_frame_time(self.sim)
else:
self.gym.poll_viewer_events(self.viewer)
def __parse_sim_params(self, physics_engine: str, config_sim: Dict[str, Any]) -> gymapi.SimParams:
"""Parse the config dictionary for physics stepping settings.
Args:
physics_engine: which physics engine to use. "physx" or "flex"
config_sim: dict of sim configuration parameters
Returns
IsaacGym SimParams object with updated settings.
"""
sim_params = gymapi.SimParams()
# check correct up-axis
if config_sim["up_axis"] not in ["z", "y"]:
msg = f"Invalid physics up-axis: {config_sim['up_axis']}"
print(msg)
raise ValueError(msg)
# assign general sim parameters
sim_params.dt = config_sim["dt"]
sim_params.num_client_threads = config_sim.get("num_client_threads", 0)
sim_params.use_gpu_pipeline = config_sim["use_gpu_pipeline"]
sim_params.substeps = config_sim.get("substeps", 2)
# assign up-axis
if config_sim["up_axis"] == "z":
sim_params.up_axis = gymapi.UP_AXIS_Z
else:
sim_params.up_axis = gymapi.UP_AXIS_Y
# assign gravity
sim_params.gravity = gymapi.Vec3(*config_sim["gravity"])
# configure physics parameters
if physics_engine == "physx":
# set the parameters
if "physx" in config_sim:
for opt in config_sim["physx"].keys():
if opt == "contact_collection":
setattr(sim_params.physx, opt, gymapi.ContactCollection(config_sim["physx"][opt]))
else:
setattr(sim_params.physx, opt, config_sim["physx"][opt])
else:
# set the parameters
if "flex" in config_sim:
for opt in config_sim["flex"].keys():
setattr(sim_params.flex, opt, config_sim["flex"][opt])
# return the configured params
return sim_params
"""
Domain Randomization methods
"""
def get_actor_params_info(self, dr_params: Dict[str, Any], env):
"""Generate a flat array of actor params, their names and ranges.
Returns:
The array
"""
if "actor_params" not in dr_params:
return None
params = []
names = []
lows = []
highs = []
param_getters_map = get_property_getter_map(self.gym)
for actor, actor_properties in dr_params["actor_params"].items():
handle = self.gym.find_actor_handle(env, actor)
for prop_name, prop_attrs in actor_properties.items():
if prop_name == 'color':
continue # this is set randomly
props = param_getters_map[prop_name](env, handle)
if not isinstance(props, list):
props = [props]
for prop_idx, prop in enumerate(props):
for attr, attr_randomization_params in prop_attrs.items():
name = prop_name + '_' + str(prop_idx) + '_' + attr
lo_hi = attr_randomization_params['range']
distr = attr_randomization_params['distribution']
if 'uniform' not in distr:
lo_hi = (-1.0 * float('Inf'), float('Inf'))
if isinstance(prop, np.ndarray):
for attr_idx in range(prop[attr].shape[0]):
params.append(prop[attr][attr_idx])
names.append(name + '_' + str(attr_idx))
lows.append(lo_hi[0])
highs.append(lo_hi[1])
else:
params.append(getattr(prop, attr))
names.append(name)
lows.append(lo_hi[0])
highs.append(lo_hi[1])
return params, names, lows, highs
def apply_randomizations(self, dr_params):
"""Apply domain randomizations to the environment.
Note that currently we can only apply randomizations only on resets, due to current PhysX limitations
Args:
dr_params: parameters for domain randomization to use.
"""
# If we don't have a randomization frequency, randomize every step
rand_freq = dr_params.get("frequency", 1)
# First, determine what to randomize:
# - non-environment parameters when > frequency steps have passed since the last non-environment
# - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
# - on the first call, randomize everything
self.last_step = self.gym.get_frame_count(self.sim)
if self.first_randomization:
do_nonenv_randomize = True
env_ids = list(range(self.num_envs))
else:
do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf),
torch.zeros_like(self.randomize_buf))
rand_envs = torch.logical_and(rand_envs, self.reset_buf)
env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
self.randomize_buf[rand_envs] = 0
if do_nonenv_randomize:
self.last_rand_step = self.last_step
param_setters_map = get_property_setter_map(self.gym)
param_setter_defaults_map = get_default_setter_args(self.gym)
param_getters_map = get_property_getter_map(self.gym)
# On first iteration, check the number of buckets
if self.first_randomization:
check_buckets(self.gym, self.envs, dr_params)
for nonphysical_param in ["observations", "actions"]:
if nonphysical_param in dr_params and do_nonenv_randomize:
dist = dr_params[nonphysical_param]["distribution"]
op_type = dr_params[nonphysical_param]["operation"]
sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[
nonphysical_param] else None
sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[
nonphysical_param] else None
op = operator.add if op_type == 'additive' else operator.mul
if sched_type == 'linear':
sched_scaling = 1.0 / sched_step * \
min(self.last_step, sched_step)
elif sched_type == 'constant':
sched_scaling = 0 if self.last_step < sched_step else 1
else:
sched_scaling = 1
if dist == 'gaussian':
mu, var = dr_params[nonphysical_param]["range"]
mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
if op_type == 'additive':
mu *= sched_scaling
var *= sched_scaling
mu_corr *= sched_scaling
var_corr *= sched_scaling
elif op_type == 'scaling':
var = var * sched_scaling # scale up var over time
mu = mu * sched_scaling + 1.0 * \
(1.0 - sched_scaling) # linearly interpolate
var_corr = var_corr * sched_scaling # scale up var over time
mu_corr = mu_corr * sched_scaling + 1.0 * \
(1.0 - sched_scaling) # linearly interpolate
def noise_lambda(tensor, param_name=nonphysical_param):
params = self.dr_randomizations[param_name]
corr = params.get('corr', None)
if corr is None:
corr = torch.randn_like(tensor)
params['corr'] = corr
corr = corr * params['var_corr'] + params['mu_corr']
return op(
tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])
self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr,
'var_corr': var_corr, 'noise_lambda': noise_lambda}
elif dist == 'uniform':
lo, hi = dr_params[nonphysical_param]["range"]
lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
if op_type == 'additive':
lo *= sched_scaling
hi *= sched_scaling
lo_corr *= sched_scaling
hi_corr *= sched_scaling
elif op_type == 'scaling':
lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
def noise_lambda(tensor, param_name=nonphysical_param):
params = self.dr_randomizations[param_name]
corr = params.get('corr', None)
if corr is None:
corr = torch.randn_like(tensor)
params['corr'] = corr
corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])
self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr,
'hi_corr': hi_corr, 'noise_lambda': noise_lambda}
if "sim_params" in dr_params and do_nonenv_randomize:
prop_attrs = dr_params["sim_params"]
prop = self.gym.get_sim_params(self.sim)
if self.first_randomization:
self.original_props["sim_params"] = {
attr: getattr(prop, attr) for attr in dir(prop)}
for attr, attr_randomization_params in prop_attrs.items():
apply_random_samples(
prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)
self.gym.set_sim_params(self.sim, prop)
# If self.actor_params_generator is initialized: use it to
# sample actor simulation params. This gives users the
# freedom to generate samples from arbitrary distributions,
# e.g. use full-covariance distributions instead of the DR's
# default of treating each simulation parameter independently.
extern_offsets = {}
if self.actor_params_generator is not None:
for env_id in env_ids:
self.extern_actor_params[env_id] = \
self.actor_params_generator.sample()
extern_offsets[env_id] = 0
for actor, actor_properties in dr_params["actor_params"].items():
for env_id in env_ids:
env = self.envs[env_id]
handle = self.gym.find_actor_handle(env, actor)
extern_sample = self.extern_actor_params[env_id]
for prop_name, prop_attrs in actor_properties.items():
if prop_name == 'color':
num_bodies = self.gym.get_actor_rigid_body_count(
env, handle)
for n in range(num_bodies):
self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1),
random.uniform(0, 1)))
continue
if prop_name == 'scale':
setup_only = prop_attrs.get('setup_only', False)
if (setup_only and not self.sim_initialized) or not setup_only:
attr_randomization_params = prop_attrs
sample = generate_random_samples(attr_randomization_params, 1,
self.last_step, None)
og_scale = 1
if attr_randomization_params['operation'] == 'scaling':
new_scale = og_scale * sample
elif attr_randomization_params['operation'] == 'additive':
new_scale = og_scale + sample
self.gym.set_actor_scale(env, handle, new_scale)
continue
prop = param_getters_map[prop_name](env, handle)
set_random_properties = True
if isinstance(prop, list):
if self.first_randomization:
self.original_props[prop_name] = [
{attr: getattr(p, attr) for attr in dir(p)} for p in prop]
for p, og_p in zip(prop, self.original_props[prop_name]):
for attr, attr_randomization_params in prop_attrs.items():
setup_only = attr_randomization_params.get('setup_only', False)
if (setup_only and not self.sim_initialized) or not setup_only:
smpl = None
if self.actor_params_generator is not None:
smpl, extern_offsets[env_id] = get_attr_val_from_sample(
extern_sample, extern_offsets[env_id], p, attr)
apply_random_samples(
p, og_p, attr, attr_randomization_params,
self.last_step, smpl)
else:
set_random_properties = False
else:
if self.first_randomization:
self.original_props[prop_name] = deepcopy(prop)
for attr, attr_randomization_params in prop_attrs.items():
setup_only = attr_randomization_params.get('setup_only', False)
if (setup_only and not self.sim_initialized) or not setup_only:
smpl = None
if self.actor_params_generator is not None:
smpl, extern_offsets[env_id] = get_attr_val_from_sample(
extern_sample, extern_offsets[env_id], prop, attr)
apply_random_samples(
prop, self.original_props[prop_name], attr,
attr_randomization_params, self.last_step, smpl)
else:
set_random_properties = False
if set_random_properties:
setter = param_setters_map[prop_name]
default_args = param_setter_defaults_map[prop_name]
setter(env, handle, prop, *default_args)
if self.actor_params_generator is not None:
for env_id in env_ids: # check that we used all dims in sample
if extern_offsets[env_id] > 0:
extern_sample = self.extern_actor_params[env_id]
if extern_offsets[env_id] != extern_sample.shape[0]:
print('env_id', env_id,
'extern_offset', extern_offsets[env_id],
'vs extern_sample.shape', extern_sample.shape)
raise Exception("Invalid extern_sample size")
self.first_randomization = False
================================================
FILE: timechamber/tasks/base/vec_task.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import Dict, Any, Tuple
import gym
from gym import spaces
from isaacgym import gymtorch, gymapi
from isaacgym.torch_utils import to_torch
from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples
import torch
import numpy as np
import operator, random
from copy import deepcopy
import sys
import abc
from abc import ABC
EXISTING_SIM = None
SCREEN_CAPTURE_RESOLUTION = (1027, 768)
def _create_sim_once(gym, *args, **kwargs):
global EXISTING_SIM
if EXISTING_SIM is not None:
return EXISTING_SIM
else:
EXISTING_SIM = gym.create_sim(*args, **kwargs)
return EXISTING_SIM
class Env(ABC):
def __init__(self, config: Dict[str, Any], rl_device: str, sim_device: str, graphics_device_id: int, headless: bool):
"""Initialise the env.
Args:
config: the configuration dictionary.
sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu'
graphics_device_id: the device ID to render with.
headless: Set to False to disable viewer rendering.
"""
split_device = sim_device.split(":")
self.device_type = split_device[0]
self.device_id = int(split_device[1]) if len(split_device) > 1 else 0
self.device = "cpu"
if config["sim"]["use_gpu_pipeline"]:
if self.device_type.lower() == "cuda" or self.device_type.lower() == "gpu":
self.device = "cuda" + ":" + str(self.device_id)
else:
print("GPU Pipeline can only be used with GPU simulation. Forcing CPU Pipeline.")
config["sim"]["use_gpu_pipeline"] = False
self.rl_device = rl_device
# Rendering
# if training in a headless mode
self.headless = headless
enable_camera_sensors = config.get("enableCameraSensors", False)
self.graphics_device_id = graphics_device_id
if enable_camera_sensors == False and self.headless == True:
self.graphics_device_id = -1
self.num_environments = config["env"]["numEnvs"]
self.num_agents = config["env"].get("numAgents", 1) # used for multi-agent environments
self.num_observations = config["env"]["numObservations"]
self.num_states = config["env"].get("numStates", 0)
self.num_actions = config["env"]["numActions"]
self.control_freq_inv = config["env"].get("controlFrequencyInv", 1)
self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf)
self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf)
self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.)
self.clip_obs = config["env"].get("clipObservations", np.Inf)
self.clip_actions = config["env"].get("clipActions", np.Inf)
@abc.abstractmethod
def allocate_buffers(self):
"""Create torch buffers for observations, rewards, actions dones and any additional data."""
@abc.abstractmethod
def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]:
"""Step the physics of the environment.
Args:
actions: actions to apply
Returns:
Observations, rewards, resets, info
Observations are dict of observations (currently only one member called 'obs')
"""
@abc.abstractmethod
def reset(self)-> Dict[str, torch.Tensor]:
"""Reset the environment.
Returns:
Observation dictionary
"""
@abc.abstractmethod
def reset_idx(self, env_ids: torch.Tensor):
"""Reset environments having the provided indices.
Args:
env_ids: environments to reset
"""
@property
def observation_space(self) -> gym.Space:
"""Get the environment's observation space."""
return self.obs_space
@property
def action_space(self) -> gym.Space:
"""Get the environment's action space."""
return self.act_space
@property
def num_envs(self) -> int:
"""Get the number of environments."""
return self.num_environments
@property
def num_acts(self) -> int:
"""Get the number of actions in the environment."""
return self.num_actions
@property
def num_obs(self) -> int:
"""Get the number of observations in the environment."""
return self.num_observations
class VecTask(Env):
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 24}
def __init__(self, config, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture: bool = False, force_render: bool = False):
"""Initialise the `VecTask`.
Args:
config: config dictionary for the environment.
sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu'
graphics_device_id: the device ID to render with.
headless: Set to False to disable viewer rendering.
virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`.
force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True)
"""
super().__init__(config, rl_device, sim_device, graphics_device_id, headless)
self.virtual_screen_capture = virtual_screen_capture
self.virtual_display = None
if self.virtual_screen_capture:
from pyvirtualdisplay.smartdisplay import SmartDisplay
self.virtual_display = SmartDisplay(size=SCREEN_CAPTURE_RESOLUTION)
self.virtual_display.start()
self.force_render = force_render
self.sim_params = self.__parse_sim_params(self.cfg["physics_engine"], self.cfg["sim"])
if self.cfg["physics_engine"] == "physx":
self.physics_engine = gymapi.SIM_PHYSX
elif self.cfg["physics_engine"] == "flex":
self.physics_engine = gymapi.SIM_FLEX
else:
msg = f"Invalid physics engine backend: {self.cfg['physics_engine']}"
raise ValueError(msg)
# optimization flags for pytorch JIT
torch._C._jit_set_profiling_mode(False)
torch._C._jit_set_profiling_executor(False)
self.gym = gymapi.acquire_gym()
self.first_randomization = True
self.original_props = {}
self.dr_randomizations = {}
self.actor_params_generator = None
self.extern_actor_params = {}
self.last_step = -1
self.last_rand_step = -1
for env_id in range(self.num_envs):
self.extern_actor_params[env_id] = None
# create envs, sim and viewer
self.sim_initialized = False
self.create_sim()
self.gym.prepare_sim(self.sim)
self.sim_initialized = True
self.set_viewer()
self.allocate_buffers()
self.obs_dict = {}
def set_viewer(self):
"""Create the viewer."""
# todo: read from config
self.enable_viewer_sync = True
self.viewer = None
# if running with a viewer, set up keyboard shortcuts and camera
if self.headless == False:
# subscribe to keyboard shortcuts
self.viewer = self.gym.create_viewer(
self.sim, gymapi.CameraProperties())
self.gym.subscribe_viewer_keyboard_event(
self.viewer, gymapi.KEY_ESCAPE, "QUIT")
self.gym.subscribe_viewer_keyboard_event(
self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
# set the camera position based on up axis
sim_params = self.gym.get_sim_params(self.sim)
if sim_params.up_axis == gymapi.UP_AXIS_Z:
cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
else:
cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
cam_target = gymapi.Vec3(10.0, 0.0, 15.0)
self.gym.viewer_camera_look_at(
self.viewer, None, cam_pos, cam_target)
def allocate_buffers(self):
"""Allocate the observation, states, etc. buffers.
These are what is used to set observations and states in the environment classes which
inherit from this one, and are read in `step` and other related functions.
"""
# allocate buffers
self.obs_buf = torch.zeros(
(self.num_envs, self.num_obs), device=self.device, dtype=torch.float)
self.states_buf = torch.zeros(
(self.num_envs, self.num_states), device=self.device, dtype=torch.float)
self.rew_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.float)
self.reset_buf = torch.ones(
self.num_envs, device=self.device, dtype=torch.long)
self.timeout_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.progress_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.randomize_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.extras = {}
def create_sim(self, compute_device: int, graphics_device: int, physics_engine, sim_params: gymapi.SimParams):
"""Create an Isaac Gym sim object.
Args:
compute_device: ID of compute device to use.
graphics_device: ID of graphics device to use.
physics_engine: physics engine to use (`gymapi.SIM_PHYSX` or `gymapi.SIM_FLEX`)
sim_params: sim params to use.
Returns:
the Isaac Gym sim object.
"""
sim = _create_sim_once(self.gym, compute_device, graphics_device, physics_engine, sim_params)
if sim is None:
print("*** Failed to create sim")
quit()
return sim
def get_state(self):
"""Returns the state buffer of the environment (the privileged observations for asymmetric training)."""
return torch.clamp(self.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
@abc.abstractmethod
def pre_physics_step(self, actions: torch.Tensor):
"""Apply the actions to the environment (eg by setting torques, position targets).
Args:
actions: the actions to apply
"""
@abc.abstractmethod
def post_physics_step(self):
"""Compute reward and observations, reset any environments that require it."""
def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]:
"""Step the physics of the environment.
Args:
actions: actions to apply
Returns:
Observations, rewards, resets, info
Observations are dict of observations (currently only one member called 'obs')
"""
# randomize actions
if self.dr_randomizations.get('actions', None):
actions = self.dr_randomizations['actions']['noise_lambda'](actions)
action_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions)
# apply actions
self.pre_physics_step(action_tensor)
# step physics and render each frame
for i in range(self.control_freq_inv):
if self.force_render:
self.render()
self.gym.simulate(self.sim)
# to fix!
if self.device == 'cpu':
self.gym.fetch_results(self.sim, True)
# compute observations, rewards, resets, ...
self.post_physics_step()
# fill time out buffer: set to 1 if we reached the max episode length AND the reset buffer is 1. Timeout == 1 makes sense only if the reset buffer is 1.
self.timeout_buf = (self.progress_buf >= self.max_episode_length - 1) & (self.reset_buf != 0)
# randomize observations
if self.dr_randomizations.get('observations', None):
self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)
self.extras["time_outs"] = self.timeout_buf.to(self.rl_device)
self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
# asymmetric actor-critic
if self.num_states > 0:
self.obs_dict["states"] = self.get_state()
return self.obs_dict, self.rew_buf.to(self.rl_device), self.reset_buf.to(self.rl_device), self.extras
def zero_actions(self) -> torch.Tensor:
"""Returns a buffer with zero actions.
Returns:
A buffer of zero torch actions
"""
actions = torch.zeros([self.num_envs, self.num_actions], dtype=torch.float32, device=self.rl_device)
return actions
def reset_idx(self, env_idx):
"""Reset environment with indces in env_idx.
Should be implemented in an environment class inherited from VecTask.
"""
pass
def reset(self):
"""Is called only once when environment starts to provide the first observations.
Doesn't calculate observations. Actual reset and observation calculation need to be implemented by user.
Returns:
Observation dictionary
"""
self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
# asymmetric actor-critic
if self.num_states > 0:
self.obs_dict["states"] = self.get_state()
return self.obs_dict
def reset_done(self):
"""Reset the environment.
Returns:
Observation dictionary, indices of environments being reset
"""
done_env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
if len(done_env_ids) > 0:
self.reset_idx(done_env_ids)
self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
# asymmetric actor-critic
if self.num_states > 0:
self.obs_dict["states"] = self.get_state()
return self.obs_dict, done_env_ids
def render(self, mode="rgb_array"):
"""Draw the frame to the viewer, and check for keyboard events."""
if self.viewer:
# check for window closed
if self.gym.query_viewer_has_closed(self.viewer):
sys.exit()
# check for keyboard events
for evt in self.gym.query_viewer_action_events(self.viewer):
if evt.action == "QUIT" and evt.value > 0:
sys.exit()
elif evt.action == "toggle_viewer_sync" and evt.value > 0:
self.enable_viewer_sync = not self.enable_viewer_sync
# fetch results
if self.device != 'cpu':
self.gym.fetch_results(self.sim, True)
# step graphics
if self.enable_viewer_sync:
self.gym.step_graphics(self.sim)
self.gym.draw_viewer(self.viewer, self.sim, True)
# Wait for dt to elapse in real time.
# This synchronizes the physics simulation with the rendering rate.
self.gym.sync_frame_time(self.sim)
else:
self.gym.poll_viewer_events(self.viewer)
if self.virtual_display and mode == "rgb_array":
img = self.virtual_display.grab()
return np.array(img)
def __parse_sim_params(self, physics_engine: str, config_sim: Dict[str, Any]) -> gymapi.SimParams:
"""Parse the config dictionary for physics stepping settings.
Args:
physics_engine: which physics engine to use. "physx" or "flex"
config_sim: dict of sim configuration parameters
Returns
IsaacGym SimParams object with updated settings.
"""
sim_params = gymapi.SimParams()
# check correct up-axis
if config_sim["up_axis"] not in ["z", "y"]:
msg = f"Invalid physics up-axis: {config_sim['up_axis']}"
print(msg)
raise ValueError(msg)
# assign general sim parameters
sim_params.dt = config_sim["dt"]
sim_params.num_client_threads = config_sim.get("num_client_threads", 0)
sim_params.use_gpu_pipeline = config_sim["use_gpu_pipeline"]
sim_params.substeps = config_sim.get("substeps", 2)
# assign up-axis
if config_sim["up_axis"] == "z":
sim_params.up_axis = gymapi.UP_AXIS_Z
else:
sim_params.up_axis = gymapi.UP_AXIS_Y
# assign gravity
sim_params.gravity = gymapi.Vec3(*config_sim["gravity"])
# configure physics parameters
if physics_engine == "physx":
# set the parameters
if "physx" in config_sim:
for opt in config_sim["physx"].keys():
if opt == "contact_collection":
setattr(sim_params.physx, opt, gymapi.ContactCollection(config_sim["physx"][opt]))
else:
setattr(sim_params.physx, opt, config_sim["physx"][opt])
else:
# set the parameters
if "flex" in config_sim:
for opt in config_sim["flex"].keys():
setattr(sim_params.flex, opt, config_sim["flex"][opt])
# return the configured params
return sim_params
"""
Domain Randomization methods
"""
def get_actor_params_info(self, dr_params: Dict[str, Any], env):
"""Generate a flat array of actor params, their names and ranges.
Returns:
The array
"""
if "actor_params" not in dr_params:
return None
params = []
names = []
lows = []
highs = []
param_getters_map = get_property_getter_map(self.gym)
for actor, actor_properties in dr_params["actor_params"].items():
handle = self.gym.find_actor_handle(env, actor)
for prop_name, prop_attrs in actor_properties.items():
if prop_name == 'color':
continue # this is set randomly
props = param_getters_map[prop_name](env, handle)
if not isinstance(props, list):
props = [props]
for prop_idx, prop in enumerate(props):
for attr, attr_randomization_params in prop_attrs.items():
name = prop_name+'_' + str(prop_idx) + '_'+attr
lo_hi = attr_randomization_params['range']
distr = attr_randomization_params['distribution']
if 'uniform' not in distr:
lo_hi = (-1.0*float('Inf'), float('Inf'))
if isinstance(prop, np.ndarray):
for attr_idx in range(prop[attr].shape[0]):
params.append(prop[attr][attr_idx])
names.append(name+'_'+str(attr_idx))
lows.append(lo_hi[0])
highs.append(lo_hi[1])
else:
params.append(getattr(prop, attr))
names.append(name)
lows.append(lo_hi[0])
highs.append(lo_hi[1])
return params, names, lows, highs
def apply_randomizations(self, dr_params):
"""Apply domain randomizations to the environment.
Note that currently we can only apply randomizations only on resets, due to current PhysX limitations
Args:
dr_params: parameters for domain randomization to use.
"""
# If we don't have a randomization frequency, randomize every step
rand_freq = dr_params.get("frequency", 1)
# First, determine what to randomize:
# - non-environment parameters when > frequency steps have passed since the last non-environment
# - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
# - on the first call, randomize everything
self.last_step = self.gym.get_frame_count(self.sim)
if self.first_randomization:
do_nonenv_randomize = True
env_ids = list(range(self.num_envs))
else:
do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf))
rand_envs = torch.logical_and(rand_envs, self.reset_buf)
env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
self.randomize_buf[rand_envs] = 0
if do_nonenv_randomize:
self.last_rand_step = self.last_step
param_setters_map = get_property_setter_map(self.gym)
param_setter_defaults_map = get_default_setter_args(self.gym)
param_getters_map = get_property_getter_map(self.gym)
# On first iteration, check the number of buckets
if self.first_randomization:
check_buckets(self.gym, self.envs, dr_params)
for nonphysical_param in ["observations", "actions"]:
if nonphysical_param in dr_params and do_nonenv_randomize:
dist = dr_params[nonphysical_param]["distribution"]
op_type = dr_params[nonphysical_param]["operation"]
sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None
sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None
op = operator.add if op_type == 'additive' else operator.mul
if sched_type == 'linear':
sched_scaling = 1.0 / sched_step * \
min(self.last_step, sched_step)
elif sched_type == 'constant':
sched_scaling = 0 if self.last_step < sched_step else 1
else:
sched_scaling = 1
if dist == 'gaussian':
mu, var = dr_params[nonphysical_param]["range"]
mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
if op_type == 'additive':
mu *= sched_scaling
var *= sched_scaling
mu_corr *= sched_scaling
var_corr *= sched_scaling
elif op_type == 'scaling':
var = var * sched_scaling # scale up var over time
mu = mu * sched_scaling + 1.0 * \
(1.0 - sched_scaling) # linearly interpolate
var_corr = var_corr * sched_scaling # scale up var over time
mu_corr = mu_corr * sched_scaling + 1.0 * \
(1.0 - sched_scaling) # linearly interpolate
def noise_lambda(tensor, param_name=nonphysical_param):
params = self.dr_randomizations[param_name]
corr = params.get('corr', None)
if corr is None:
corr = torch.randn_like(tensor)
params['corr'] = corr
corr = corr * params['var_corr'] + params['mu_corr']
return op(
tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])
self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda}
elif dist == 'uniform':
lo, hi = dr_params[nonphysical_param]["range"]
lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
if op_type == 'additive':
lo *= sched_scaling
hi *= sched_scaling
lo_corr *= sched_scaling
hi_corr *= sched_scaling
elif op_type == 'scaling':
lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
def noise_lambda(tensor, param_name=nonphysical_param):
params = self.dr_randomizations[param_name]
corr = params.get('corr', None)
if corr is None:
corr = torch.randn_like(tensor)
params['corr'] = corr
corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])
self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda}
if "sim_params" in dr_params and do_nonenv_randomize:
prop_attrs = dr_params["sim_params"]
prop = self.gym.get_sim_params(self.sim)
if self.first_randomization:
self.original_props["sim_params"] = {
attr: getattr(prop, attr) for attr in dir(prop)}
for attr, attr_randomization_params in prop_attrs.items():
apply_random_samples(
prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)
self.gym.set_sim_params(self.sim, prop)
# If self.actor_params_generator is initialized: use it to
# sample actor simulation params. This gives users the
# freedom to generate samples from arbitrary distributions,
# e.g. use full-covariance distributions instead of the DR's
# default of treating each simulation parameter independently.
extern_offsets = {}
if self.actor_params_generator is not None:
for env_id in env_ids:
self.extern_actor_params[env_id] = \
self.actor_params_generator.sample()
extern_offsets[env_id] = 0
for actor, actor_properties in dr_params["actor_params"].items():
for env_id in env_ids:
env = self.envs[env_id]
handle = self.gym.find_actor_handle(env, actor)
extern_sample = self.extern_actor_params[env_id]
for prop_name, prop_attrs in actor_properties.items():
if prop_name == 'color':
num_bodies = self.gym.get_actor_rigid_body_count(
env, handle)
for n in range(num_bodies):
self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)))
continue
if prop_name == 'scale':
setup_only = prop_attrs.get('setup_only', False)
if (setup_only and not self.sim_initialized) or not setup_only:
attr_randomization_params = prop_attrs
sample = generate_random_samples(attr_randomization_params, 1,
self.last_step, None)
og_scale = 1
if attr_randomization_params['operation'] == 'scaling':
new_scale = og_scale * sample
elif attr_randomization_params['operation'] == 'additive':
new_scale = og_scale + sample
self.gym.set_actor_scale(env, handle, new_scale)
continue
prop = param_getters_map[prop_name](env, handle)
set_random_properties = True
if isinstance(prop, list):
if self.first_randomization:
self.original_props[prop_name] = [
{attr: getattr(p, attr) for attr in dir(p)} for p in prop]
for p, og_p in zip(prop, self.original_props[prop_name]):
for attr, attr_randomization_params in prop_attrs.items():
setup_only = attr_randomization_params.get('setup_only', False)
if (setup_only and not self.sim_initialized) or not setup_only:
smpl = None
if self.actor_params_generator is not None:
smpl, extern_offsets[env_id] = get_attr_val_from_sample(
extern_sample, extern_offsets[env_id], p, attr)
apply_random_samples(
p, og_p, attr, attr_randomization_params,
self.last_step, smpl)
else:
set_random_properties = False
else:
if self.first_randomization:
self.original_props[prop_name] = deepcopy(prop)
for attr, attr_randomization_params in prop_attrs.items():
setup_only = attr_randomization_params.get('setup_only', False)
if (setup_only and not self.sim_initialized) or not setup_only:
smpl = None
if self.actor_params_generator is not None:
smpl, extern_offsets[env_id] = get_attr_val_from_sample(
extern_sample, extern_offsets[env_id], prop, attr)
apply_random_samples(
prop, self.original_props[prop_name], attr,
attr_randomization_params, self.last_step, smpl)
else:
set_random_properties = False
if set_random_properties:
setter = param_setters_map[prop_name]
default_args = param_setter_defaults_map[prop_name]
setter(env, handle, prop, *default_args)
if self.actor_params_generator is not None:
for env_id in env_ids: # check that we used all dims in sample
if extern_offsets[env_id] > 0:
extern_sample = self.extern_actor_params[env_id]
if extern_offsets[env_id] != extern_sample.shape[0]:
print('env_id', env_id,
'extern_offset', extern_offsets[env_id],
'vs extern_sample.shape', extern_sample.shape)
raise Exception("Invalid extern_sample size")
self.first_randomization = False
================================================
FILE: timechamber/tasks/data/assets/mjcf/amp_humanoid_sword_shield.xml
================================================
================================================
FILE: timechamber/tasks/data/models/llc_reallusion_sword_shield.pth
================================================
[File too large to display: 80.6 MB]
================================================
FILE: timechamber/tasks/data/motions/reallusion_sword_shield/README.txt
================================================
This motion data is provided courtesy of Reallusion,
strictly for noncommercial use. The original motion data
is available at:
https://actorcore.reallusion.com/motion/pack/studio-mocap-sword-and-shield-stunts
https://actorcore.reallusion.com/motion/pack/studio-mocap-sword-and-shield-moves
================================================
FILE: timechamber/tasks/data/motions/reallusion_sword_shield/dataset_reallusion_sword_shield.yaml
================================================
motions:
- file: "RL_Avatar_Atk_2xCombo01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_2xCombo02_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_2xCombo03_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_2xCombo04_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_2xCombo05_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo02_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo03_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo04_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo05_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo06_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_3xCombo07_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_4xCombo01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_4xCombo02_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_4xCombo03_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_SlashDown_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_SlashLeft_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_SlashRight_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_SlashUp_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_Spin_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_Stab_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Counter_Atk01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Counter_Atk02_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Counter_Atk03_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Counter_Atk04_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Kill_2xCombo01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Kill_2xCombo02_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Kill_3xCombo01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Kill_3xCombo02_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Kill_4xCombo01_Motion.npy"
weight: 0.00724638
- file: "RL_Avatar_Atk_Jump_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Atk_Kick_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Atk_ShieldCharge_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Atk_ShieldSwipe01_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Atk_ShieldSwipe02_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Counter_Atk05_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Standoff_Feint_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Dodge_Backward_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_RunBackward_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_WalkBackward01_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_WalkBackward02_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_Dodgle_Left_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_RunLeft_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_WalkLeft01_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_WalkLeft02_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_Dodgle_Right_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_RunRight_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_WalkRight01_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_WalkRight02_Motion.npy"
weight: 0.01552795
- file: "RL_Avatar_RunForward_Motion.npy"
weight: 0.02070393
- file: "RL_Avatar_WalkForward01_Motion.npy"
weight: 0.02070393
- file: "RL_Avatar_WalkForward02_Motion.npy"
weight: 0.02070393
- file: "RL_Avatar_Standoff_Circle_Motion.npy"
weight: 0.06211180
- file: "RL_Avatar_TurnLeft90_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_TurnLeft180_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_TurnRight90_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_TurnRight180_Motion.npy"
weight: 0.03105590
- file: "RL_Avatar_Fall_Backward_Motion.npy"
weight: 0.00869565
- file: "RL_Avatar_Fall_Left_Motion.npy"
weight: 0.00869565
- file: "RL_Avatar_Fall_Right_Motion.npy"
weight: 0.00869565
- file: "RL_Avatar_Fall_SpinLeft_Motion.npy"
weight: 0.00869565
- file: "RL_Avatar_Fall_SpinRight_Motion.npy"
weight: 0.00869565
- file: "RL_Avatar_Idle_Alert(0)_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Idle_Alert_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Idle_Battle(0)_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Idle_Battle_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Idle_Ready(0)_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Idle_Ready_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Standoff_Swing_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Taunt_PoundChest_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Taunt_Roar_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Taunt_ShieldKnock_Motion.npy"
weight: 0.00434783
- file: "RL_Avatar_Shield_BlockBackward_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Shield_BlockCrouch_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Shield_BlockDown_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Shield_BlockLeft_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Shield_BlockRight_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Shield_BlockUp_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryBackward01_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryBackward02_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryBackward03_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryBackward04_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryCrouch_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryDown_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryLeft_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryRight_Motion.npy"
weight: 0.00289855
- file: "RL_Avatar_Sword_ParryUp_Motion.npy"
weight: 0.00289855
================================================
FILE: timechamber/tasks/ma_ant_battle.py
================================================
from typing import Tuple
import os
import torch
from isaacgym import gymtorch
from isaacgym.gymtorch import *
from timechamber.utils.torch_jit_utils import *
from .base.ma_vec_task import MA_VecTask
class MA_Ant_Battle(MA_VecTask):
def __init__(self, cfg, sim_device, rl_device, graphics_device_id, headless, virtual_screen_capture, force_render):
self.extras = None
self.cfg = cfg
self.randomization_params = self.cfg["task"]["randomization_params"]
self.randomize = self.cfg["task"]["randomize"]
self.max_episode_length = self.cfg["env"]["episodeLength"]
self.termination_height = self.cfg["env"]["terminationHeight"]
self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
self.action_scale = self.cfg["env"]["control"]["actionScale"]
self.joints_at_limit_cost_scale = self.cfg["env"]["jointsAtLimitCost"]
self.dof_vel_scale = self.cfg["env"]["dofVelocityScale"]
self.ant_agents_state = []
self.win_reward_scale = 2000
self.move_to_op_reward_scale = 1.
self.stay_in_center_reward_scale = 0.2
self.action_cost_scale = -0.000025
self.push_scale = 1.
self.dense_reward_scale = 1.0
self.hp_decay_scale = 1.
self.Kp = self.cfg["env"]["control"]["stiffness"]
self.Kd = self.cfg["env"]["control"]["damping"]
self.cfg["env"]["numObservations"] = 32 + 27 * (self.cfg["env"].get("numAgents", 1) - 1)
self.cfg["env"]["numActions"] = 8
self.borderline_space = cfg["env"]["borderlineSpace"]
self.borderline_space_unit = self.borderline_space / self.max_episode_length
self.ant_body_colors = [gymapi.Vec3(*rgb_arr) for rgb_arr in self.cfg["env"]["color"]]
super().__init__(config=self.cfg, sim_device=sim_device, rl_device=rl_device,
graphics_device_id=graphics_device_id,
headless=headless)
self.use_central_value = False
self.obs_idxs = torch.eye(4, dtype=torch.float32, device=self.device)
if self.viewer is not None:
for i, env in enumerate(self.envs):
self._add_circle_borderline(env, self.borderline_space)
cam_pos = gymapi.Vec3(15.0, 0.0, 3.4)
cam_target = gymapi.Vec3(10.0, 0.0, 0.0)
self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
# get gym GPU state tensors
actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
sensors_per_env = 4
self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs,
sensors_per_env * 6)
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.root_states = gymtorch.wrap_tensor(actor_root_state)
print(f'root_states:{self.root_states.shape}')
self.initial_root_states = self.root_states.clone()
self.initial_root_states[:, 7:13] = 0 # set lin_vel and ang_vel to 0
# create some wrapper tensors for different slices
self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
print(f'dof:{self.dof_state.shape}')
dof_state_shaped = self.dof_state.view(self.num_envs, -1, 2)
for idx in range(self.num_agents):
ant_root_state = self.root_states[idx::self.num_agents]
ant_dof_pos = dof_state_shaped[:, idx * self.num_dof:(idx + 1) * self.num_dof, 0]
ant_dof_vel = dof_state_shaped[:, idx * self.num_dof:(idx + 1) * self.num_dof, 1]
self.ant_agents_state.append((ant_root_state, ant_dof_pos, ant_dof_vel))
self.initial_dof_pos = torch.zeros_like(self.ant_agents_state[0][1], device=self.device, dtype=torch.float)
zero_tensor = torch.tensor([0.0], device=self.device)
self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
self.initial_dof_pos))
self.initial_dof_vel = torch.zeros_like(self.ant_agents_state[0][2], device=self.device, dtype=torch.float)
self.dt = self.cfg["sim"]["dt"]
torques = self.gym.acquire_dof_force_tensor(self.sim)
self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, self.num_agents * self.num_dof)
self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat(
(self.num_agents * self.num_envs, 1))
self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat(
(self.num_agents * self.num_envs, 1))
self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat(
(self.num_agents * self.num_envs, 1))
def allocate_buffers(self):
self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device,
dtype=torch.float)
self.rew_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.float)
self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
self.timeout_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.progress_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.randomize_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.extras = {'ranks': torch.zeros((self.num_envs, self.num_agents), device=self.device, dtype=torch.long),
'win': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device,
dtype=torch.bool),
'lose': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device,
dtype=torch.bool),
'draw': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device,
dtype=torch.bool)}
def create_sim(self):
self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
lines = []
borderline_height = 0.01
for height in range(20):
for angle in range(360):
begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height]
end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height]
lines.append(begin_point)
lines.append(end_point)
self.lines = np.array(lines, dtype=np.float32)
self._create_ground_plane()
print(f'num envs {self.num_envs} env spacing {self.cfg["env"]["envSpacing"]}')
self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
# If randomizing, apply once immediately on startup before the fist sim step
if self.randomize:
self.apply_randomizations(self.randomization_params)
def _add_circle_borderline(self, env, radius):
lines = self.lines * radius
colors = np.array([[1, 0, 0]] * (len(lines) // 2), dtype=np.float32)
self.gym.add_lines(self.viewer, env, len(lines) // 2, lines, colors)
def _create_ground_plane(self):
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.plane_static_friction
plane_params.dynamic_friction = self.plane_dynamic_friction
self.gym.add_ground(self.sim, plane_params)
def _create_envs(self, num_envs, spacing, num_per_row):
lower = gymapi.Vec3(-spacing, -spacing, 0.0)
upper = gymapi.Vec3(spacing, spacing, spacing)
asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets')
asset_file = "mjcf/nv_ant.xml"
if "asset" in self.cfg["env"]:
asset_file = self.cfg["env"]["asset"].get("assetFileName", asset_file)
asset_path = os.path.join(asset_root, asset_file)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
# Note - DOF mode is set in the MJCF file and loaded by Isaac Gym
asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
asset_options.angular_damping = 0.0
ant_assets = []
for _ in range(self.num_agents):
ant_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
ant_assets.append(ant_asset)
dof_props = self.gym.get_asset_dof_properties(ant_assets[0])
self.num_dof = self.gym.get_asset_dof_count(ant_assets[0])
self.num_bodies = self.gym.get_asset_rigid_body_count(ant_assets[0])
for i in range(self.num_dof):
dof_props['driveMode'][i] = gymapi.DOF_MODE_POS
dof_props['stiffness'][i] = self.Kp
dof_props['damping'][i] = self.Kd
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(-self.borderline_space + 1, -self.borderline_space + 1, 1.)
self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
device=self.device)
self.torso_index = 0
self.num_bodies = self.gym.get_asset_rigid_body_count(ant_assets[0])
body_names = [self.gym.get_asset_rigid_body_name(ant_assets[0], i) for i in range(self.num_bodies)]
extremity_names = [s for s in body_names if "foot" in s]
self.extremities_index = torch.zeros(len(extremity_names), dtype=torch.long, device=self.device)
print(body_names, extremity_names, self.extremities_index)
# create force sensors attached to the "feet"
extremity_indices = [self.gym.find_asset_rigid_body_index(ant_assets[0], name) for name in extremity_names]
sensor_pose = gymapi.Transform()
for body_idx in extremity_indices:
self.gym.create_asset_force_sensor(ant_assets[0], body_idx, sensor_pose)
self.ant_handles = []
self.actor_indices = []
self.envs = []
self.dof_limits_lower = []
self.dof_limits_upper = []
for i in range(self.num_envs):
# create env instance
env_ptr = self.gym.create_env(
self.sim, lower, upper, num_per_row
)
# create actor instance
for j in range(self.num_agents):
ant_handle = self.gym.create_actor(env_ptr, ant_assets[j], start_pose, "ant_" + str(j), i, -1, 0)
actor_index = self.gym.get_actor_index(env_ptr, ant_handle, gymapi.DOMAIN_SIM)
self.gym.set_actor_dof_properties(env_ptr, ant_handle, dof_props)
self.actor_indices.append(actor_index)
self.gym.enable_actor_dof_force_sensors(env_ptr, ant_handle)
self.ant_handles.append(ant_handle)
for k in range(self.num_bodies):
self.gym.set_rigid_body_color(
env_ptr, ant_handle, k, gymapi.MESH_VISUAL, self.ant_body_colors[j])
self.envs.append(env_ptr)
dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.ant_handles[0])
for j in range(self.num_dof):
if dof_prop['lower'][j] > dof_prop['upper'][j]:
self.dof_limits_lower.append(dof_prop['upper'][j])
self.dof_limits_upper.append(dof_prop['lower'][j])
else:
self.dof_limits_lower.append(dof_prop['lower'][j])
self.dof_limits_upper.append(dof_prop['upper'][j])
self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
self.actor_indices = to_torch(self.actor_indices, device=self.device).to(dtype=torch.int32)
for i in range(len(extremity_names)):
self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
extremity_names[i])
def compute_reward(self, actions):
self.rew_buf[:], self.reset_buf[:], self.extras['ranks'][:], self.extras['win'], self.extras['lose'], \
self.extras[
'draw'] = compute_ant_reward(
self.obs_buf,
self.reset_buf,
self.progress_buf,
self.torques,
self.extras['ranks'],
self.termination_height,
self.max_episode_length,
self.borderline_space,
self.borderline_space_unit,
self.win_reward_scale,
self.stay_in_center_reward_scale,
self.action_cost_scale,
self.push_scale,
self.joints_at_limit_cost_scale,
self.dense_reward_scale,
self.dt,
self.num_agents
)
def compute_observations(self):
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_force_sensor_tensor(self.sim)
self.gym.refresh_dof_force_tensor(self.sim)
for agent_idx in range(self.num_agents):
self.obs_buf[agent_idx * self.num_envs:(agent_idx + 1) * self.num_envs, :] = compute_ant_observations(
self.ant_agents_state,
self.progress_buf,
self.dof_limits_lower,
self.dof_limits_upper,
self.dof_vel_scale,
self.termination_height,
self.borderline_space_unit,
self.borderline_space,
self.num_agents,
agent_idx,
)
def reset_idx(self, env_ids):
# print('reset.....', env_ids)
# Randomization can happen only at reset time, since it can reset actor positions on GPU
if self.randomize:
self.apply_randomizations(self.randomization_params)
positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device)
velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)
for agent_idx in range(self.num_agents):
root_state, dof_pos, dof_vel = self.ant_agents_state[agent_idx]
dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
self.dof_limits_upper)
dof_vel[env_ids] = velocities
agent_env_ids = expand_env_ids(env_ids, self.num_agents)
env_ids_int32 = self.actor_indices[agent_env_ids]
rand_angle = torch.rand((len(env_ids),), device=self.device) * torch.pi * 2 # generate angle in 0-360
rand_pos = (self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) -
torch.rand((len(agent_env_ids), 2), device=self.device))
unit_angle = 2 * torch.pi / self.num_agents
for agent_idx in range(self.num_agents):
rand_pos[agent_idx::self.num_agents, 0] *= torch.cos(rand_angle + agent_idx * unit_angle)
rand_pos[agent_idx::self.num_agents, 1] *= torch.sin(rand_angle + agent_idx * unit_angle)
rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 1), device=self.device)
rand_rotation = quat_from_angle_axis(rand_floats[:, 0] * np.pi, self.z_unit_tensor[agent_env_ids])
self.root_states[agent_env_ids] = self.initial_root_states[agent_env_ids]
self.root_states[agent_env_ids, :2] = rand_pos
self.root_states[agent_env_ids, 3:7] = rand_rotation
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.progress_buf[env_ids] = 0
self.reset_buf[env_ids] = 0
self.extras['ranks'][env_ids] = 0
def pre_physics_step(self, actions):
# actions.shape = [num_envs * num_agents, num_actions], stacked as followed:
# {[(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env0),
# [(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env1),
# ... }
self.actions = torch.tensor([], device=self.device)
for agent_idx in range(self.num_agents):
self.actions = torch.cat((self.actions, actions[agent_idx * self.num_envs:(agent_idx + 1) * self.num_envs]),
dim=-1)
tmp_actions = self.extras['ranks'].unsqueeze(-1).repeat_interleave(self.num_actions, dim=-1).view(self.num_envs,
self.num_actions * self.num_agents)
zero_actions = torch.zeros_like(tmp_actions, dtype=torch.float)
self.actions = torch.where(tmp_actions > 0, zero_actions, self.actions)
# reshape [num_envs * num_agents, num_actions] to [num_envs, num_agents * num_actions] print(f'action_size{
targets = self.actions
self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets))
def post_physics_step(self):
self.progress_buf += 1
self.randomize_buf += 1
resets = self.reset_buf.reshape(self.num_envs, 1).sum(dim=1)
# print(resets)
env_ids = (resets == 1).nonzero(as_tuple=False).flatten()
if len(env_ids) > 0:
self.reset_idx(env_ids)
self.compute_observations()
self.compute_reward(self.actions)
if self.viewer is not None:
self.gym.clear_lines(self.viewer)
for i, env in enumerate(self.envs):
self._add_circle_borderline(env, self.borderline_space - self.borderline_space_unit * self.progress_buf[
i].item())
def get_number_of_agents(self):
# only train 1 agent
return 1
def zero_actions(self) -> torch.Tensor:
"""Returns a buffer with zero actions.
Returns:
A buffer of zero torch actions
"""
actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32,
device=self.rl_device)
self.extras['win'] = self.extras['lose'] = self.extras['draw'] = 0
return actions
def clear_count(self):
self.dense_reward_scale *= 0.9
self.extras['ranks'] = torch.zeros((self.num_agents, self.num_agents), device=self.device, dtype=torch.float)
#####################################################################
###=========================jit functions=========================###
#####################################################################
@torch.jit.script
def expand_env_ids(env_ids, n_agents):
# type: (Tensor, int) -> Tensor
device = env_ids.device
# print(f'nanget:{n_agents}')
agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
for idx in range(n_agents):
agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
return agent_env_ids
@torch.jit.script
def compute_ant_reward(
obs_buf,
reset_buf,
progress_buf,
torques,
now_rank,
termination_height,
max_episode_length,
borderline_space,
borderline_space_unit,
win_reward_scale,
stay_in_center_reward_scale,
action_cost_scale,
push_scale,
joints_at_limit_cost_scale,
dense_reward_scale,
dt,
num_agents
):
# type: (Tensor, Tensor, Tensor,Tensor,Tensor,float,float,float,float,float,float,float,float,float,float,float,int) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor,Tensor]
obs = obs_buf.view(num_agents, -1, obs_buf.shape[1])
nxt_rank_val = num_agents - torch.count_nonzero(now_rank, dim=-1).view(-1, 1).repeat_interleave(num_agents, dim=-1)
is_out = torch.sum(torch.square(obs[:, :, 0:2]), dim=-1) >= \
(borderline_space - progress_buf * borderline_space_unit).square()
nxt_rank = torch.where((torch.transpose(is_out, 0, 1) > 0) & (now_rank == 0), nxt_rank_val, now_rank)
# reset agents
tmp_ones = torch.ones_like(reset_buf)
reset = torch.where(is_out[0, :], tmp_ones, reset_buf)
reset = torch.where(progress_buf >= max_episode_length - 1, tmp_ones, reset)
reset = torch.where(torch.min(is_out[1:], dim=0).values, tmp_ones, reset)
tmp_reset = reset.view(-1, 1).repeat_interleave(num_agents, dim=-1)
nxt_rank = torch.where((tmp_reset == 1) & (nxt_rank == 0),
nxt_rank_val - 1,
nxt_rank)
# compute metric logic
tmp_reset = reset.view(1, -1).repeat_interleave(num_agents - 1, dim=0)
tmp_zeros = torch.zeros_like(is_out[1:], dtype=torch.bool)
wins = torch.ones_like(is_out[1:], dtype=torch.bool)
loses = torch.ones_like(is_out[1:], dtype=torch.bool)
draws = (progress_buf >= max_episode_length - 1).view(1, -1).repeat_interleave(num_agents - 1, dim=0)
wins = torch.where(is_out[1:], wins & (tmp_reset == 1), tmp_zeros)
draws = torch.where(is_out[1:] == 0, draws & (tmp_reset == 1), tmp_zeros)
loses = torch.where(is_out[1:] == 0, loses & (tmp_reset == 1) & (draws == 0), tmp_zeros)
sparse_reward = 1.0 * reset
reward_per_rank = 2 * win_reward_scale / (num_agents - 1)
sparse_reward = sparse_reward * (win_reward_scale - (nxt_rank[:, 0] - 1) * reward_per_rank)
stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs[0, :, :2], dim=-1))
dof_at_limit_cost = torch.sum(obs[0, :, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
# print("torques:", torques[0, 2])
not_move_penalty = torch.exp(-torch.sum(torch.abs(torques), dim=1))
# print(f'action:...{action_cost_penalty.shape}')
dense_reward = dof_at_limit_cost + action_cost_penalty + not_move_penalty + stay_in_center_reward
total_reward = sparse_reward + dense_reward * dense_reward_scale
return total_reward, reset, nxt_rank, wins.flatten(), loses.flatten(), draws.flatten()
@torch.jit.script
def compute_ant_observations(
ant_agents_state,
progress_buf,
dof_limits_lower,
dof_limits_upper,
dof_vel_scale,
termination_height,
borderline_space_unit,
borderline_space,
num_agents,
agent_idx,
):
# type: (List[Tuple[Tensor,Tensor,Tensor]],Tensor,Tensor,Tensor,float,float,float,float,int,int)->Tensor
# tot length:13+8+8+1+1+(num_agents-1)*(7+2+8+8+1)
self_root_state, self_dof_pos, self_dof_vel = ant_agents_state[agent_idx]
dof_pos_scaled = unscale(self_dof_pos, dof_limits_lower, dof_limits_upper)
now_border_space = (borderline_space - progress_buf * borderline_space_unit).unsqueeze(-1)
obs = torch.cat((self_root_state[:, :13], dof_pos_scaled, self_dof_vel * dof_vel_scale,
now_border_space - torch.sqrt(torch.sum(self_root_state[:, :2].square(), dim=-1)).unsqueeze(-1),
# dis to border
now_border_space,
torch.unsqueeze(self_root_state[:, 2] < termination_height, -1)), dim=-1)
for op_idx in range(num_agents):
if op_idx == agent_idx:
continue
op_root_state, op_dof_pos, op_dof_vel = ant_agents_state[op_idx]
dof_pos_scaled = unscale(op_dof_pos, dof_limits_lower, dof_limits_upper)
obs = torch.cat((obs, op_root_state[:, :7], self_root_state[:, :2] - op_root_state[:, :2],
dof_pos_scaled, op_dof_vel * dof_vel_scale,
now_border_space - torch.sqrt(torch.sum(op_root_state[:, :2].square(), dim=-1)).unsqueeze(-1),
torch.unsqueeze(op_root_state[:, 2] < termination_height, -1)), dim=-1)
# print(obs.shape)
return obs
@torch.jit.script
def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))
================================================
FILE: timechamber/tasks/ma_ant_sumo.py
================================================
from typing import Tuple
import numpy as np
import os
import math
import torch
import random
from isaacgym import gymtorch
from isaacgym import gymapi
from isaacgym.gymtorch import *
# from torch.tensor import Tensor
from timechamber.utils.torch_jit_utils import *
from .base.vec_task import VecTask
from .base.ma_vec_task import MA_VecTask
# todo critic_state full obs
class MA_Ant_Sumo(MA_VecTask):
def __init__(self, cfg, sim_device, rl_device, graphics_device_id, headless, virtual_screen_capture, force_render):
self.cfg = cfg
self.randomization_params = self.cfg["task"]["randomization_params"]
self.randomize = self.cfg["task"]["randomize"]
self.max_episode_length = self.cfg["env"]["episodeLength"]
self.termination_height = self.cfg["env"]["terminationHeight"]
self.borderline_space = cfg["env"]["borderlineSpace"]
self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
self.action_scale = self.cfg["env"]["control"]["actionScale"]
self.joints_at_limit_cost_scale = self.cfg["env"]["jointsAtLimitCost"]
self.dof_vel_scale = self.cfg["env"]["dofVelocityScale"]
self.draw_penalty_scale = -1000
self.win_reward_scale = 2000
self.move_to_op_reward_scale = 1.
self.stay_in_center_reward_scale = 0.2
self.action_cost_scale = -0.000025
self.push_scale = 1.
self.dense_reward_scale = 1.
self.hp_decay_scale = 1.
self.Kp = self.cfg["env"]["control"]["stiffness"]
self.Kd = self.cfg["env"]["control"]["damping"]
# see func: compute_ant_observations() for details
# self.cfg["env"]["numObservations"] = 48 # dof pos(2) + dof vel(2) + dof action(2) + feet force sensor(force&torque, 6)
self.cfg["env"][
"numObservations"] = 40
self.cfg["env"]["numActions"] = 8
self.cfg["env"]["numAgents"] = 2
self.use_central_value = False
super().__init__(config=self.cfg, sim_device=sim_device, rl_device=rl_device,
graphics_device_id=graphics_device_id,
headless=headless, virtual_screen_capture=virtual_screen_capture,
force_render=force_render)
if self.viewer is not None:
for env in self.envs:
self._add_circle_borderline(env)
cam_pos = gymapi.Vec3(15.0, 0.0, 3.0)
cam_target = gymapi.Vec3(10.0, 0.0, 0.0)
self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
# get gym GPU state tensors
actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
sensors_per_env = 4
self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs * self.num_agents,
sensors_per_env * 6)
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.root_states = gymtorch.wrap_tensor(actor_root_state)
print(f'root_states:{self.root_states.shape}')
self.initial_root_states = self.root_states.clone()
self.initial_root_states[:, 7:13] = 0 # set lin_vel and ang_vel to 0
# create some wrapper tensors for different slices
self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
print(f"dof state shape: {self.dof_state.shape}")
self.dof_pos = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_dof, 0]
self.dof_pos_op = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_dof:2 * self.num_dof, 0]
self.dof_vel = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_dof, 1]
self.dof_vel_op = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_dof:2 * self.num_dof, 1]
self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float)
zero_tensor = torch.tensor([0.0], device=self.device)
self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
self.initial_dof_pos))
self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float)
self.dt = self.cfg["sim"]["dt"]
torques = self.gym.acquire_dof_force_tensor(self.sim)
self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, 2 * self.num_dof)
self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1))
self.hp = torch.ones((self.num_envs,), device=self.device, dtype=torch.float32) * 100
self.hp_op = torch.ones((self.num_envs,), device=self.device, dtype=torch.float32) * 100
def allocate_buffers(self):
self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device,
dtype=torch.float)
self.rew_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.float)
self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
self.timeout_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.progress_buf = torch.zeros(
self.num_envs, device=self.device, dtype=torch.long)
self.randomize_buf = torch.zeros(
self.num_envs * self.num_agents, device=self.device, dtype=torch.long)
self.extras = {
'win': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
'lose': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool),
'draw': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool)}
def create_sim(self):
self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
self._create_ground_plane()
print(f'num envs {self.num_envs} env spacing {self.cfg["env"]["envSpacing"]}')
self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
# If randomizing, apply once immediately on startup before the fist sim step
if self.randomize:
self.apply_randomizations(self.randomization_params)
def _add_circle_borderline(self, env):
lines = []
borderline_height = 0.01
for height in range(20):
for angle in range(360):
begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height]
end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height]
lines.append(begin_point)
lines.append(end_point)
lines = np.array(lines, dtype=np.float32) * self.borderline_space
colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32)
self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors)
def _create_ground_plane(self):
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.plane_static_friction
plane_params.dynamic_friction = self.plane_dynamic_friction
self.gym.add_ground(self.sim, plane_params)
def _create_envs(self, num_envs, spacing, num_per_row):
lower = gymapi.Vec3(-spacing, -spacing, 0.0)
upper = gymapi.Vec3(spacing, spacing, spacing)
asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets')
asset_file = "mjcf/nv_ant.xml"
if "asset" in self.cfg["env"]:
asset_file = self.cfg["env"]["asset"].get("assetFileName", asset_file)
asset_path = os.path.join(asset_root, asset_file)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
# Note - DOF mode is set in the MJCF file and loaded by Isaac Gym
asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
asset_options.angular_damping = 0.0
ant_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
ant_asset_op = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
dof_props = self.gym.get_asset_dof_properties(ant_asset)
self.num_dof = self.gym.get_asset_dof_count(ant_asset)
self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset) # 9 = 4 x 2(front&back-end legs) + 1(torso)
for i in range(self.num_dof):
dof_props['driveMode'][i] = gymapi.DOF_MODE_POS
dof_props['stiffness'][i] = self.Kp
dof_props['damping'][i] = self.Kd
box_pose = gymapi.Transform()
box_pose.p = gymapi.Vec3(0, 0, 0)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(-self.borderline_space + 1, -self.borderline_space + 1, 1.)
start_pose_op = gymapi.Transform()
start_pose_op.p = gymapi.Vec3(self.borderline_space - 1, self.borderline_space - 1, 1.)
print(start_pose.p, start_pose_op.p)
self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
device=self.device)
self.torso_index = 0
self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset)
body_names = [self.gym.get_asset_rigid_body_name(ant_asset, i) for i in range(self.num_bodies)]
extremity_names = [s for s in body_names if "foot" in s]
self.extremities_index = torch.zeros(len(extremity_names), dtype=torch.long, device=self.device)
# create force sensors attached to the "feet"
extremity_indices = [self.gym.find_asset_rigid_body_index(ant_asset, name) for name in extremity_names]
sensor_pose = gymapi.Transform()
sensor_pose_op = gymapi.Transform()
for body_idx in extremity_indices:
self.gym.create_asset_force_sensor(ant_asset, body_idx, sensor_pose)
self.gym.create_asset_force_sensor(ant_asset_op, body_idx, sensor_pose_op)
self.ant_handles = []
self.actor_indices = []
self.actor_indices_op = []
self.actor_handles_op = []
self.envs = []
self.pos_before = torch.zeros(2, device=self.device)
self.dof_limits_lower = []
self.dof_limits_upper = []
for i in range(self.num_envs):
# create env instance
env_ptr = self.gym.create_env(
self.sim, lower, upper, num_per_row
)
ant_handle = self.gym.create_actor(env_ptr, ant_asset, start_pose, "ant", i, -1, 0)
actor_index = self.gym.get_actor_index(env_ptr, ant_handle, gymapi.DOMAIN_SIM)
self.gym.set_actor_dof_properties(env_ptr, ant_handle, dof_props)
self.actor_indices.append(actor_index)
self.gym.enable_actor_dof_force_sensors(env_ptr, ant_handle)
ant_handle_op = self.gym.create_actor(env_ptr, ant_asset_op, start_pose_op, "ant_op", i, -1, 0)
actor_index_op = self.gym.get_actor_index(env_ptr, ant_handle_op, gymapi.DOMAIN_SIM)
self.gym.set_actor_dof_properties(env_ptr, ant_handle_op, dof_props)
self.actor_indices_op.append(actor_index_op)
for j in range(self.num_bodies):
self.gym.set_rigid_body_color(
env_ptr, ant_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.97, 0.38, 0.06))
self.gym.set_rigid_body_color(
env_ptr, ant_handle_op, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.24, 0.38, 0.06))
self.envs.append(env_ptr)
self.ant_handles.append(ant_handle)
self.actor_handles_op.append(ant_handle_op)
dof_prop = self.gym.get_actor_dof_properties(env_ptr, ant_handle)
for j in range(self.num_dof):
if dof_prop['lower'][j] > dof_prop['upper'][j]:
self.dof_limits_lower.append(dof_prop['upper'][j])
self.dof_limits_upper.append(dof_prop['lower'][j])
else:
self.dof_limits_lower.append(dof_prop['lower'][j])
self.dof_limits_upper.append(dof_prop['upper'][j])
self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
self.actor_indices = to_torch(self.actor_indices, dtype=torch.long, device=self.device)
self.actor_indices_op = to_torch(self.actor_indices_op, dtype=torch.long, device=self.device)
for i in range(len(extremity_names)):
self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
extremity_names[i])
def compute_reward(self, actions):
self.rew_buf[:], self.reset_buf[:], self.hp[:], self.hp_op[:], \
self.extras['win'], self.extras['lose'], self.extras['draw'] = compute_ant_reward(
self.obs_buf[:self.num_envs],
self.obs_buf[self.num_envs:],
self.reset_buf,
self.progress_buf,
self.pos_before,
self.torques[:, :self.num_dof],
self.hp,
self.hp_op,
self.termination_height,
self.max_episode_length,
self.borderline_space,
self.draw_penalty_scale,
self.win_reward_scale,
self.move_to_op_reward_scale,
self.stay_in_center_reward_scale,
self.action_cost_scale,
self.push_scale,
self.joints_at_limit_cost_scale,
self.dense_reward_scale,
self.hp_decay_scale,
self.dt,
)
def compute_observations(self):
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_force_sensor_tensor(self.sim)
self.gym.refresh_dof_force_tensor(self.sim)
self.obs_buf[:self.num_envs] = \
compute_ant_observations(
self.root_states[0::2],
self.root_states[1::2],
self.dof_pos,
self.dof_vel,
self.dof_limits_lower,
self.dof_limits_upper,
self.dof_vel_scale,
self.termination_height
)
self.obs_buf[self.num_envs:] = compute_ant_observations(
self.root_states[1::2],
self.root_states[0::2],
self.dof_pos_op,
self.dof_vel_op,
self.dof_limits_lower,
self.dof_limits_upper,
self.dof_vel_scale,
self.termination_height
)
def reset_idx(self, env_ids):
# print('reset.....', env_ids)
# Randomization can happen only at reset time, since it can reset actor positions on GPU
if self.randomize:
self.apply_randomizations(self.randomization_params)
positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device)
velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)
self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
self.dof_limits_upper)
self.dof_vel[env_ids] = velocities
self.dof_pos_op[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
self.dof_limits_upper)
self.dof_vel_op[env_ids] = velocities
env_ids_int32 = (torch.cat((self.actor_indices[env_ids], self.actor_indices_op[env_ids]))).to(dtype=torch.int32)
agent_env_ids = expand_env_ids(env_ids, 2)
rand_angle = torch.rand((len(env_ids),), device=self.device) * torch.pi * 2
rand_pos = torch.ones((len(agent_env_ids), 2), device=self.device) * (
self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand(
(len(agent_env_ids), 2), device=self.device) * 2)
rand_pos[0::2, 0] *= torch.cos(rand_angle)
rand_pos[0::2, 1] *= torch.sin(rand_angle)
rand_pos[1::2, 0] *= torch.cos(rand_angle + torch.pi)
rand_pos[1::2, 1] *= torch.sin(rand_angle + torch.pi)
rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 3), device=self.device)
rand_rotation = quat_from_angle_axis(rand_floats[:, 1] * np.pi, self.z_unit_tensor[agent_env_ids])
rand_rotation2 = quat_from_angle_axis(rand_floats[:, 2] * np.pi, self.z_unit_tensor[agent_env_ids])
self.root_states[agent_env_ids] = self.initial_root_states[agent_env_ids]
self.root_states[agent_env_ids, :2] = rand_pos
self.root_states[agent_env_ids[1::2], 3:7] = rand_rotation[1::2]
self.root_states[agent_env_ids[0::2], 3:7] = rand_rotation2[0::2]
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.pos_before = self.root_states[0::2, :2].clone()
self.progress_buf[env_ids] = 0
self.reset_buf[env_ids] = 0
def pre_physics_step(self, actions):
# actions.shape = [num_envs * num_agents, num_actions], stacked as followed:
# {[(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env0),
# [(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env1),
# ... }
self.actions = actions.clone().to(self.device)
self.actions = torch.cat((self.actions[:self.num_envs], self.actions[self.num_envs:]), dim=-1)
# reshape [num_envs * num_agents, num_actions] to [num_envs, num_agents * num_actions]
targets = self.actions
self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets))
def post_physics_step(self):
self.progress_buf += 1
self.randomize_buf += 1
self.compute_observations()
self.compute_reward(self.actions)
self.pos_before = self.obs_buf[:self.num_envs, :2].clone()
def get_number_of_agents(self):
# train one agent with index 0
return 1
def zero_actions(self) -> torch.Tensor:
"""Returns a buffer with zero actions.
Returns:
A buffer of zero torch actions
"""
actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32,
device=self.rl_device)
return actions
def clear_count(self):
self.dense_reward_scale *= 0.9
self.extras['win'][:] = 0
self.extras['draw'][:] = 0
#####################################################################
###=========================jit functions=========================###
#####################################################################
@torch.jit.script
def expand_env_ids(env_ids, n_agents):
# type: (Tensor, int) -> Tensor
device = env_ids.device
agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
for idx in range(n_agents):
agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
return agent_env_ids
@torch.jit.script
def compute_move_reward(
pos,
pos_before,
target,
dt,
move_to_op_reward_scale
):
# type: (Tensor,Tensor,Tensor,float,float) -> Tensor
move_vec = (pos - pos_before) / dt
direction = target - pos_before
direction = torch.div(direction, torch.linalg.norm(direction, dim=-1).view(-1, 1))
s = torch.sum(move_vec * direction, dim=-1)
return torch.maximum(s, torch.zeros_like(s)) * move_to_op_reward_scale
@torch.jit.script
def compute_ant_reward(
obs_buf,
obs_buf_op,
reset_buf,
progress_buf,
pos_before,
torques,
hp,
hp_op,
termination_height,
max_episode_length,
borderline_space,
draw_penalty_scale,
win_reward_scale,
move_to_op_reward_scale,
stay_in_center_reward_scale,
action_cost_scale,
push_scale,
joints_at_limit_cost_scale,
dense_reward_scale,
hp_decay_scale,
dt,
):
# type: (Tensor, Tensor, Tensor, Tensor,Tensor,Tensor,Tensor,Tensor,float, float,float, float,float,float,float,float,float,float,float,float,float) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor,Tensor,Tensor]
hp -= (obs_buf[:, 2] < termination_height) * hp_decay_scale
hp_op -= (obs_buf_op[:, 2] < termination_height) * hp_decay_scale
is_out = torch.sum(torch.square(obs_buf[:, 0:2]), dim=-1) >= borderline_space ** 2
is_out_op = torch.sum(torch.square(obs_buf_op[:, 0:2]), dim=-1) >= borderline_space ** 2
is_out = is_out | (hp <= 0)
is_out_op = is_out_op | (hp_op <= 0)
# reset agents
tmp_ones = torch.ones_like(reset_buf)
reset = torch.where(is_out, tmp_ones, reset_buf)
reset = torch.where(is_out_op, tmp_ones, reset)
reset = torch.where(progress_buf >= max_episode_length - 1, tmp_ones, reset)
hp = torch.where(reset > 0, tmp_ones * 100., hp)
hp_op = torch.where(reset > 0, tmp_ones * 100., hp_op)
win_reward = win_reward_scale * is_out_op
lose_penalty = -win_reward_scale * is_out
draw_penalty = torch.where(progress_buf >= max_episode_length - 1, tmp_ones * draw_penalty_scale,
torch.zeros_like(reset, dtype=torch.float))
move_reward = compute_move_reward(obs_buf[:, 0:2], pos_before,
obs_buf_op[:, 0:2], dt,
move_to_op_reward_scale)
# stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs_buf[:, :2], dim=-1))
dof_at_limit_cost = torch.sum(obs_buf[:, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale
push_reward = -push_scale * torch.exp(-torch.linalg.norm(obs_buf_op[:, :2], dim=-1))
action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale
not_move_penalty = -10 * torch.exp(-torch.sum(torch.abs(torques), dim=1))
dense_reward = move_reward + dof_at_limit_cost + push_reward + action_cost_penalty + not_move_penalty
total_reward = win_reward + lose_penalty + draw_penalty + dense_reward * dense_reward_scale
return total_reward, reset, hp, hp_op, is_out_op, is_out, progress_buf >= max_episode_length - 1
@torch.jit.script
def compute_ant_observations(
root_states,
root_states_op,
dof_pos,
dof_vel,
dof_limits_lower,
dof_limits_upper,
dof_vel_scale,
termination_height
):
# type: (Tensor,Tensor,Tensor,Tensor,Tensor,Tensor,float,float)->Tensor
dof_pos_scaled = unscale(dof_pos, dof_limits_lower, dof_limits_upper)
obs = torch.cat(
(root_states[:, :13], dof_pos_scaled, dof_vel * dof_vel_scale, root_states_op[:, :7],
root_states[:, :2] - root_states_op[:, :2], torch.unsqueeze(root_states[:, 2] < termination_height, -1),
torch.unsqueeze(root_states_op[:, 2] < termination_height, -1)), dim=-1)
return obs
@torch.jit.script
def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))
================================================
FILE: timechamber/tasks/ma_humanoid_strike.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from asyncio import shield
from dis import dis
import torch
import math
from isaacgym import gymapi, gymtorch
from isaacgym.torch_utils import *
import timechamber.tasks.ase_humanoid_base.humanoid_amp_task as humanoid_amp_task
from timechamber.utils import torch_utils
class HumanoidStrike(humanoid_amp_task.HumanoidAMPTask):
def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
super().__init__(cfg=cfg,
sim_params=sim_params,
physics_engine=physics_engine,
device_type=device_type,
device_id=device_id,
headless=headless)
self.ego_to_op_damage = torch.zeros_like(self.reset_buf, device=self.device, dtype=torch.float)
self.op_to_ego_damage = torch.zeros_like(self.reset_buf, device=self.device, dtype=torch.float)
self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
self._prev_root_pos_op = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
self._prev_body_ang_vel = torch.zeros([self.num_envs, self.num_bodies, 3],
device=self.device, dtype=torch.float32)
self._prev_body_vel = torch.zeros([self.num_envs, self.num_bodies, 3],
device=self.device, dtype=torch.float32)
strike_body_names = cfg["env"]["strikeBodyNames"]
self._strike_body_ids = self._build_body_ids_tensor(self.envs[0], self.humanoid_handles[0], strike_body_names)
force_body_names = cfg["env"]["forceBodies"]
self._force_body_ids = self._build_body_ids_tensor(self.envs[0], self.humanoid_handles[0], force_body_names)
if self.viewer != None:
for env in self.envs:
self._add_rectangle_borderline(env)
cam_pos = gymapi.Vec3(15.0, 0.0, 3.0)
cam_target = gymapi.Vec3(10.0, 0.0, 0.0)
self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
###### Reward Definition ######
###### Reward Definition ######
return
def get_task_obs_size(self):
obs_size = 0
if (self._enable_task_obs):
obs_size = 50
return obs_size
def _create_envs(self, num_envs, spacing, num_per_row):
super()._create_envs(num_envs, spacing, num_per_row)
return
def _build_env(self, env_id, env_ptr, humanoid_asset, humanoid_asset_op):
super()._build_env(env_id, env_ptr, humanoid_asset, humanoid_asset_op)
return
def _build_body_ids_tensor(self, env_ptr, actor_handle, body_names):
env_ptr = self.envs[0]
actor_handle = self.humanoid_handles[0]
body_ids = []
for body_name in body_names:
body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
assert(body_id != -1)
body_ids.append(body_id)
body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
return body_ids
def _reset_actors(self, env_ids):
positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device)
velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)
self._dof_pos[env_ids] = tensor_clamp(self._initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
self.dof_limits_upper)
self._dof_vel[env_ids] = velocities
self._dof_pos_op[env_ids] = tensor_clamp(self._initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
self.dof_limits_upper)
self._dof_vel_op[env_ids] = velocities
agent_env_ids = expand_env_ids(env_ids, 2)
rand_angle = torch.rand((len(env_ids),), device=self.device) * math.pi * 2
rand_pos = torch.ones((len(agent_env_ids), 2), device=self.device) * (
self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand(
(len(agent_env_ids), 2), device=self.device) * 2)
rand_pos[0::2, 0] *= torch.cos(rand_angle)
rand_pos[0::2, 1] *= torch.sin(rand_angle)
rand_pos[1::2, 0] *= torch.cos(rand_angle + math.pi)
rand_pos[1::2, 1] *= torch.sin(rand_angle + math.pi)
rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 3), device=self.device)
rand_rotation = quat_from_angle_axis(rand_floats[:, 1] * np.pi, self.z_unit_tensor[agent_env_ids])
rand_rotation2 = quat_from_angle_axis(rand_floats[:, 2] * np.pi, self.z_unit_tensor[agent_env_ids])
self._humanoid_root_states[agent_env_ids] = self._initial_humanoid_root_states[agent_env_ids]
self._humanoid_root_states[agent_env_ids, :2] = rand_pos
self._humanoid_root_states[agent_env_ids[1::2], 3:7] = rand_rotation[1::2]
self._humanoid_root_states[agent_env_ids[0::2], 3:7] = rand_rotation2[0::2]
return
def _reset_env_tensors(self, env_ids):
super()._reset_env_tensors(env_ids)
self.ego_to_op_damage[env_ids] = 0
self.op_to_ego_damage[env_ids] = 0
return
def pre_physics_step(self, actions):
super().pre_physics_step(actions)
# self._prev_root_pos[:] = self._humanoid_root_states[self.humanoid_indices, 0:3]
# self._prev_root_pos_op[:] = self._humanoid_root_states[self.humanoid_indices_op, 0:3]
# self._prev_body_ang_vel[:] = self._rigid_body_ang_vel[]
return
def post_physics_step(self):
super().post_physics_step()
self._prev_body_ang_vel[:] = self._rigid_body_ang_vel[:]
self._prev_body_vel[:] = self._rigid_body_vel[:]
def _compute_observations(self):
obs, obs_op = self._compute_humanoid_obs()
if (self._enable_task_obs):
task_obs, task_obs_op = self._compute_task_obs()
obs = torch.cat([obs, task_obs], dim=-1)
obs_op = torch.cat([obs_op, task_obs_op], dim=-1)
self.obs_buf[:self.num_envs] = obs
self.obs_buf[self.num_envs:] = obs_op
return
def _compute_task_obs(self):
body_pos = self._rigid_body_pos
body_rot = self._rigid_body_rot
body_vel = self._rigid_body_vel
body_pos_op = self._rigid_body_pos_op
body_rot_op = self._rigid_body_rot_op
body_vel_op = self._rigid_body_vel_op
# num_envs, 13
root_states = self._humanoid_root_states[self.humanoid_indices]
root_states_op = self._humanoid_root_states[self.humanoid_indices_op]
obs = compute_strike_observations(root_states, root_states_op,
body_pos, body_rot,
body_pos_op, body_vel_op,
borderline=self.borderline_space
)
obs_op = compute_strike_observations(root_states=root_states_op,
root_states_op=root_states,
body_pos=body_pos_op,
body_rot=body_rot_op,
body_pos_op=body_pos,
body_vel_op=body_vel,
borderline=self.borderline_space)
return obs, obs_op
def _compute_reward(self, actions):
root_states = self._humanoid_root_states[self.humanoid_indices]
root_states_op = self._humanoid_root_states[self.humanoid_indices_op]
body_pos = self._rigid_body_pos
body_vel = self._rigid_body_vel
prev_body_vel = self._prev_body_vel
body_ang_vel = self._rigid_body_ang_vel
prev_body_ang_vel = self._prev_body_ang_vel
contact_force = self._contact_forces
body_pos_op = self._rigid_body_pos_op
contact_force_op = self._contact_forces_op
self.rew_buf[:], force_ego_to_op, force_op_to_ego = compute_strike_reward(root_states=root_states,
root_states_op=root_states_op,
body_pos=body_pos,
body_ang_vel=body_ang_vel,
prev_body_ang_vel=prev_body_ang_vel,
body_vel=body_vel,
prev_body_vel=prev_body_vel,
body_pos_op=body_pos_op,
force_body_ids=self._force_body_ids,
strike_body_ids=self._strike_body_ids,
contact_force=contact_force,
contact_force_op=contact_force_op,
contact_body_ids=self._contact_body_ids,
borderline=self.borderline_space,
termination_heights=self._termination_heights,
dt=self.dt)
self.ego_to_op_damage += force_ego_to_op
self.op_to_ego_damage += force_op_to_ego
return
def _compute_reset(self):
self.reset_buf[:], self._terminate_buf[:],\
self.extras['win'], self.extras['lose'], self.extras['draw'] = \
compute_humanoid_reset(self.reset_buf, self.progress_buf,
self.ego_to_op_damage,
self.op_to_ego_damage,
self._contact_forces,
self._contact_forces_op,
self._contact_body_ids,
self._rigid_body_pos,
self._rigid_body_pos_op,
self.max_episode_length,
self._enable_early_termination,
self._termination_heights,
self.borderline_space)
return
#####################################################################
###=========================jit functions=========================###
#####################################################################
@torch.jit.script
def compute_strike_observations(root_states, root_states_op, body_pos, body_rot,
body_pos_op, body_vel_op, borderline,
):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor,Tensor,float) -> Tensor
root_pos = root_states[:, 0:3]
root_rot = root_states[:, 3:7]
ego_sword_pos = body_pos[:, 6, :]
ego_sword_rot = body_rot[:, 6, :]
ego_shield_pos = body_pos[:, 9, :]
ego_shield_rot = body_rot[:, 9, :]
root_pos_op = root_states_op[:, 0:3]
root_rot_op = root_states_op[:, 3:7]
root_vel_op = root_states_op[:, 7:10]
root_ang_op = root_states_op[:, 10:13]
op_sword_pos = body_pos_op[:, 6, :]
op_sword_vel = body_vel_op[:, 6, :]
op_torso_pos = body_pos_op[:, 1, :]
op_torso_vel = body_vel_op[:, 1, :]
op_head_pos = body_pos_op[:, 2, :]
op_head_vel = body_vel_op[:, 2, :]
op_right_upper_arm_pos = body_pos_op[:, 3, :]
op_right_thigh_pos = body_pos_op[:, 11, :]
op_left_thigh_pos = body_pos_op[:, 14, :]
##*******************************************************##
relative_x_1 = borderline - root_pos[:, 0]
relative_x_2 = root_pos[:, 0] + borderline
relative_x = torch.minimum(relative_x_1, relative_x_2)
relative_x = torch.unsqueeze(relative_x, -1)
relative_y_1 = borderline - root_pos[:, 1]
relative_y_2 = root_pos[:,1] + borderline
relative_y = torch.minimum(relative_y_1, relative_y_2)
relative_y = torch.unsqueeze(relative_y, -1)
##*******************************************************##
heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
sword_rot = torch_utils.calc_heading_quat_inv(ego_sword_rot)
shield_rot = torch_utils.calc_heading_quat_inv(ego_shield_rot)
local_op_relative_pos = root_pos_op - root_pos
local_op_relative_pos[..., -1] = root_pos_op[..., -1]
local_op_relative_pos = quat_rotate(heading_rot, local_op_relative_pos)
local_op_vel = quat_rotate(heading_rot, root_vel_op)
local_op_ang_vel = quat_rotate(heading_rot, root_ang_op)
local_op_rot = quat_mul(heading_rot, root_rot_op)
local_op_rot_obs = torch_utils.quat_to_tan_norm(local_op_rot)
##*******************************************************##
# op sword relative ego position and vel
local_op_relative_sword_pos = op_sword_pos - root_pos
local_op_relative_sword_pos = quat_rotate(heading_rot, local_op_relative_sword_pos)
local_op_sword_vel = quat_rotate(heading_rot, op_sword_vel)
# op sword relative ego shield position and vel
local_op_sword_shield_pos = op_sword_pos - ego_shield_pos
local_op_sword_shield_pos = quat_rotate(shield_rot, local_op_sword_shield_pos)
local_op_sword_shield_vel = quat_rotate(shield_rot, op_sword_vel)
# relative position and vel of ego sword and op up body
relative_sword_torso_pos = op_torso_pos - ego_sword_pos
relative_sword_torso_pos = quat_rotate(sword_rot, relative_sword_torso_pos)
relative_sword_torso_vel = quat_rotate(sword_rot, op_torso_vel)
relative_sword_head_pos = op_head_pos - ego_sword_pos
relative_sword_head_pos = quat_rotate(sword_rot, relative_sword_head_pos)
relative_sword_head_vel = quat_rotate(sword_rot, op_head_vel)
relative_sword_right_arm_pos = op_right_upper_arm_pos - ego_sword_pos
relative_sword_right_arm_pos = quat_rotate(sword_rot, relative_sword_right_arm_pos)
relative_sword_right_thigh_pos = op_right_thigh_pos - ego_sword_pos
relative_sword_right_thigh_pos = quat_rotate(sword_rot, relative_sword_right_thigh_pos)
relative_sword_left_thigh_pos = op_left_thigh_pos - ego_sword_pos
relative_sword_left_thigh_pos = quat_rotate(sword_rot, relative_sword_left_thigh_pos)
obs = torch.cat([relative_x, relative_y,
local_op_relative_pos, local_op_rot_obs,
local_op_vel, local_op_ang_vel,
local_op_relative_sword_pos, local_op_sword_vel,
local_op_sword_shield_pos, local_op_sword_shield_vel,
relative_sword_torso_pos, relative_sword_torso_vel,
relative_sword_head_pos, relative_sword_head_vel,
relative_sword_right_arm_pos, relative_sword_right_thigh_pos,
relative_sword_left_thigh_pos
], dim=-1)
return obs
@torch.jit.script
def compute_strike_reward(root_states, root_states_op, body_pos, body_ang_vel,
prev_body_ang_vel, body_vel, prev_body_vel,
body_pos_op, force_body_ids, strike_body_ids,
contact_force, contact_force_op, contact_body_ids,
borderline, termination_heights, dt):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor,Tensor,Tensor,Tensor,float, Tensor, float) -> Tuple[Tensor, Tensor,Tensor]
op_fall_reward_w = 200.0
ego_fall_out_reward_w = 50.0
shield_to_sword_pos_reward_w = 1.0
damage_reward_w = 8.0
sword_to_op_reward_w = 0.8
reward_energy_w = 3.0
reward_strike_vel_acc_w = 3.0
reward_face_w = 4.0
reward_foot_to_op_w = 10.0
reward_kick_w = 2.0
num_envs = root_states.shape[0]
reward = torch.zeros((num_envs, 1), dtype=torch.float32)
root_xy_pos = root_states[:, 0:2]
root_pos = root_states[:, 0:3]
ego_sword_pos = body_pos[:, 6, 0:3]
ego_shield_pos = body_pos[:, 9, 0:3]
ego_right_foot_pos = body_pos[:, 13, 0:3]
op_sword_pos = body_pos_op[:, 6, 0:3]
op_torse_pos = body_pos_op[:, 1, 0:3]
op_right_thigh_pos = body_pos_op[:, 11, 0:3]
op_left_thigh_pos = body_pos_op[:, 14, 0:3]
root_pos_xy_op = root_states_op[:, 0:2]
root_pos_xy = root_states[:, 0:2]
root_pos_op = root_states_op[:, 0:3]
root_rot = root_states[:, 3:7]
root_rot_op = root_states_op[:, 3:7]
up = torch.zeros_like(root_pos_op)
up[..., -1] = 1
contact_buf = contact_force.clone()
contact_buf_op = contact_force_op.clone()
##*****************r energy******************##
strike_body_vel = body_vel[:, strike_body_ids, :]
strike_body_vel_norm = torch.sum(torch.norm(strike_body_vel, dim=-1), dim=1)
strike_body_vel_norm = torch.clamp(strike_body_vel_norm, max=20)
distance = root_pos_xy_op - root_xy_pos
distance = torch.norm(distance, dim=-1)
zeros = torch.zeros_like(distance)
k_dist = torch.exp(-10 * torch.maximum(zeros, distance - 2.0))
r_energy = k_dist * strike_body_vel_norm
r_energy = r_energy / 20.
strike_vel_dfff = body_vel[:, strike_body_ids, :] - prev_body_vel[:, strike_body_ids, :]
strike_vel_acc = strike_vel_dfff / dt
strike_vel_acc = torch.sum(torch.norm(strike_vel_acc, dim=-1), dim=1)
strike_vel_acc = torch.clamp(strike_vel_acc, max=1000)
strike_vel_acc = k_dist * strike_vel_acc / 500
r_strike_vel_acc = strike_vel_acc
##*****************r damage******************##
ego_to_op_force = contact_buf_op[:, force_body_ids, :]
op_to_ego_force = contact_buf[:, force_body_ids, :]
force_ego_to_op = torch.norm(ego_to_op_force, dim=2).sum(dim=1)
force_op_to_ego = torch.norm(op_to_ego_force, dim=2).sum(dim=1)
r_damage = force_ego_to_op - force_op_to_ego * 2
r_damage = torch.clamp(r_damage, min= -200.)
r_damage /= 100
##*****************r kick******************##
ego_foot_op_torse_distance = op_torse_pos - ego_right_foot_pos
ego_foot_op_torse_err = torch.norm(ego_foot_op_torse_distance, dim=-1)
succ_foot = ego_foot_op_torse_err < 0.1
r_foot_to_op = torch.exp(-0.5 * ego_foot_op_torse_err)
constant_r = torch.ones_like(r_foot_to_op)
r_foot_to_op = torch.where(succ_foot, constant_r, r_foot_to_op)
foot_height = ego_right_foot_pos[..., 2]
succ_kick = foot_height >= 0.4
zeros = torch.zeros_like(succ_kick)
constant_r_kick = torch.ones_like(succ_kick)
r_kick = torch.where(succ_kick, constant_r_kick, foot_height)
##*****************r close******************##
# sword -> torso
pos_err_scale1 = 1.0
pos_err_scale2 = 2.0
sword_torse_distance = op_torse_pos - ego_sword_pos
sword_torse_err = torch.sum(sword_torse_distance * sword_torse_distance, dim=-1)
sword_right_thigh_distance = op_right_thigh_pos - ego_sword_pos
sword_right_thigh_err = torch.sum(sword_right_thigh_distance * sword_right_thigh_distance, dim=-1)
sword_left_thigh_distance = op_left_thigh_pos - ego_sword_pos
sword_left_thigh_err = torch.sum(sword_left_thigh_distance * sword_left_thigh_distance, dim=-1)
sword_sword_distance = op_sword_pos - ego_sword_pos
sword_sword_err = torch.sum(sword_sword_distance * sword_sword_distance, dim=-1)
# zeros = torch.zeros_like(sword_torse_distance)
r_close = torch.exp(-pos_err_scale1 * sword_torse_err) # -> [0, 1]
r_close += torch.exp(-pos_err_scale1 * sword_right_thigh_err)
r_close += torch.exp(-pos_err_scale1 * sword_left_thigh_err)
r_close += torch.exp(-pos_err_scale2 * sword_sword_err)
##*****************r shelid with op sword******************##
pos_err_scale3 = 2.0
ego_shield_op_sword_distance = op_sword_pos - ego_shield_pos
ego_shield_op_sword_err = torch.sum(ego_shield_op_sword_distance * ego_shield_op_sword_distance, dim=-1)
r_shield_to_sword = torch.exp(-pos_err_scale3 * ego_shield_op_sword_err)
##*****************r face******************##
tar_dir = root_pos_xy_op - root_xy_pos
tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1)
heading_rot = torch_utils.calc_heading_quat(root_rot)
facing_dir = torch.zeros_like(root_pos)
facing_dir[..., 0] = 1.0
facing_dir = quat_rotate(heading_rot, facing_dir)
facing_err = torch.sum(tar_dir * facing_dir[..., 0:2], dim=-1)
facing_reward = torch.clamp_min(facing_err, 0.0)
##*****************r op fall******************##
masked_contact_buf_op = contact_buf_op.clone()
masked_contact_buf_op[:, contact_body_ids, :] = 0
fall_contact_op = torch.any(torch.abs(masked_contact_buf_op) > 0.1, dim=-1)
fall_contact_op = torch.any(fall_contact_op, dim=-1)
body_height_op = body_pos_op[..., 2]
fall_height_op = body_height_op < termination_heights
fall_height_op[:, contact_body_ids] = False
fall_height_op = torch.any(fall_height_op, dim=-1)
has_fallen_op = torch.logical_and(fall_contact_op, fall_height_op)
op_up = quat_rotate(root_rot_op, up)
op_rot_err = torch.sum(up * op_up, dim=-1)
op_rot_r = 0.6 * torch.clamp_min(1.0 - op_rot_err, 0.0) # -> [0, 1] succ = op_rot_err < 0.2
op_rot_r = torch.where(has_fallen_op, torch.ones_like(op_rot_r), op_rot_r)
# test, when op fall, then r_close = 0 to encourage to agents separate.
r_separate = torch.norm((root_pos_xy_op - root_pos_xy), dim=-1)
r_separate = torch.where(r_separate > 0.1, r_separate, torch.zeros_like(r_separate))
r_close = torch.where(has_fallen_op, r_separate, r_close)
r_shield_to_sword = torch.where(has_fallen_op, torch.zeros_like(r_shield_to_sword), r_shield_to_sword)
##*****************r penalty******************##
relative_x_1 = borderline - root_xy_pos[:, 0]
relative_x_2 = root_xy_pos[:, 0] + borderline
relative_x = torch.minimum(relative_x_1, relative_x_2)
relative_x = relative_x < 0
relative_y_1 = borderline - root_xy_pos[:, 1]
relative_y_2 = root_xy_pos[:,1] + borderline
relative_y = torch.minimum(relative_y_1, relative_y_2)
relative_y = relative_y < 0
is_out = relative_x | relative_y
r_penalty = is_out * 1.0
masked_contact_buf = contact_force.clone()
masked_contact_buf[:, contact_body_ids, :] = 0
fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
fall_contact = torch.any(fall_contact, dim=-1)
body_height = body_pos[..., 2]
fall_height = body_height < termination_heights
fall_height[:, contact_body_ids] = False
fall_height = torch.any(fall_height, dim=-1)
has_fallen_ego = torch.logical_and(fall_contact, fall_height)
r_penalty += has_fallen_ego * 1.0
##*****************r penalty******************##
reward = -r_penalty * ego_fall_out_reward_w + op_rot_r * op_fall_reward_w + \
r_shield_to_sword * shield_to_sword_pos_reward_w + r_close * sword_to_op_reward_w +\
r_damage * damage_reward_w + r_energy * reward_energy_w + facing_reward * reward_face_w + \
r_strike_vel_acc * reward_strike_vel_acc_w + r_foot_to_op * reward_foot_to_op_w +\
r_kick * reward_kick_w
return reward, force_ego_to_op, force_op_to_ego
@torch.jit.script
def compute_humanoid_reset(reset_buf, progress_buf, ego_to_op_damage, op_to_ego_damage,
contact_buf, contact_buf_op, contact_body_ids,
rigid_body_pos, rigid_body_pos_op, max_episode_length,
enable_early_termination, termination_heights, borderline):
# type: (Tensor, Tensor, Tensor, Tensor,Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor, float) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor]
terminated = torch.zeros_like(reset_buf)
if (enable_early_termination):
masked_contact_buf = contact_buf.clone()
masked_contact_buf_op = contact_buf_op.clone()
masked_contact_buf[:, contact_body_ids, :] = 0
masked_contact_buf_op[:, contact_body_ids, :] = 0
fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
fall_contact = torch.any(fall_contact, dim=-1)
fall_contact_op = torch.any(torch.abs(masked_contact_buf_op) > 0.1, dim=-1)
fall_contact_op = torch.any(fall_contact_op, dim=-1)
body_height = rigid_body_pos[..., 2]
body_height_op = rigid_body_pos_op[..., 2]
fall_height = body_height < termination_heights
fall_height_op = body_height_op < termination_heights
fall_height[:, contact_body_ids] = False
fall_height_op[:, contact_body_ids] = False
fall_height = torch.any(fall_height, dim=-1)
fall_height_op = torch.any(fall_height_op, dim=-1)
## out area
root_pos = rigid_body_pos[:, 0, 0:2]
root_pos_op = rigid_body_pos_op[:, 0, 0:2]
relative_x_1 = borderline - root_pos[:, 0]
relative_x_2 = root_pos[:, 0] + borderline
relative_x = torch.minimum(relative_x_1, relative_x_2)
relative_x = relative_x < 0
relative_y_1 = borderline - root_pos[:, 1]
relative_y_2 = root_pos[:,1] + borderline
relative_y = torch.minimum(relative_y_1, relative_y_2)
relative_y = relative_y < 0
is_out_ego = relative_x | relative_y
relative_x_1_op = borderline - root_pos_op[:, 0]
relative_x_2_op = root_pos_op[:, 0] + borderline
relative_x_op = torch.minimum(relative_x_1_op, relative_x_2_op)
relative_x_op = relative_x_op < 0
relative_y_1_op = borderline - root_pos_op[:, 1]
relative_y_2_op = root_pos_op[:,1] + borderline
relative_y_op = torch.minimum(relative_y_1_op, relative_y_2_op)
relative_y_op = relative_y_op < 0
is_out_op = relative_x_op | relative_y_op
is_out = is_out_ego | is_out_op
has_failed = is_out
# first timestep can sometimes still have nonzero contact forces
# so only check after first couple of steps
has_failed *= (progress_buf > 1)
terminated = torch.where(has_failed, torch.ones_like(reset_buf), terminated)
damage_ego_more_than_op = ego_to_op_damage > op_to_ego_damage
damage_op_more_than_ego = op_to_ego_damage > ego_to_op_damage
reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)
win = torch.where(reset, damage_ego_more_than_op, torch.zeros_like(reset_buf, dtype=torch.bool))
lose = torch.where(reset, damage_op_more_than_ego, torch.zeros_like(reset_buf, dtype=torch.bool))
draw = torch.where(reset, ego_to_op_damage == op_to_ego_damage, torch.zeros_like(reset_buf, dtype=torch.bool))
return reset, terminated, win, lose, draw
@torch.jit.script
def expand_env_ids(env_ids, n_agents):
# type: (Tensor, int) -> Tensor
device = env_ids.device
agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long)
for idx in range(n_agents):
agent_env_ids[idx::n_agents] = env_ids * n_agents + idx
return agent_env_ids
================================================
FILE: timechamber/train.py
================================================
# train.py
# Script to train policies in Isaac Gym
#
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import datetime
from statistics import mode
import isaacgym
import os
import hydra
import yaml
from omegaconf import DictConfig, OmegaConf
from hydra.utils import to_absolute_path
import gym
from timechamber.utils.reformat import omegaconf_to_dict, print_dict
from timechamber.utils.utils import set_np_formatting, set_seed
from timechamber.utils.rlgames_utils import RLGPUEnv, RLGPUAlgoObserver, get_rlgames_env_creator
from rl_games.common import env_configurations, vecenv
from rl_games.torch_runner import Runner
from rl_games.algos_torch import model_builder
from timechamber.ase import ase_agent
from timechamber.ase import ase_models
from timechamber.ase import ase_network_builder
from timechamber.ase import hrl_models
from timechamber.ase import hrl_network_builder
from timechamber.learning import ppo_sp_agent
from timechamber.learning import hrl_sp_agent
from timechamber.learning import ppo_sp_player
from timechamber.learning import hrl_sp_player
from timechamber.learning import vectorized_models
from timechamber.learning import vectorized_network_builder
import timechamber
## OmegaConf & Hydra Config
# Resolvers used in hydra configs (see https://omegaconf.readthedocs.io/en/2.1_branch/usage.html#resolvers)
@hydra.main(config_name="config", config_path="./cfg")
def launch_rlg_hydra(cfg: DictConfig):
time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
run_name = f"{cfg.wandb_name}_{time_str}"
# ensure checkpoints can be specified as relative paths
if cfg.checkpoint:
cfg.checkpoint = to_absolute_path(cfg.checkpoint)
cfg_dict = omegaconf_to_dict(cfg)
print_dict(cfg_dict)
# set numpy formatting for printing only
set_np_formatting()
rank = int(os.getenv("LOCAL_RANK", "0"))
if cfg.multi_gpu:
# torchrun --standalone --nnodes=1 --nproc_per_node=2 train.py
cfg.sim_device = f'cuda:{rank}'
cfg.rl_device = f'cuda:{rank}'
# sets seed. if seed is -1 will pick a random one
cfg.seed += rank
cfg.seed = set_seed(cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=rank)
if cfg.wandb_activate and rank == 0:
# Make sure to install WandB if you actually use this.
import wandb
run = wandb.init(
project=cfg.wandb_project,
group=cfg.wandb_group,
entity=cfg.wandb_entity,
config=cfg_dict,
sync_tensorboard=True,
name=run_name,
resume="allow",
)
def create_env_thunk(**kwargs):
envs = timechamber.make(
cfg.seed,
cfg.task_name,
cfg.task.env.numEnvs,
cfg.sim_device,
cfg.rl_device,
cfg.graphics_device_id,
cfg.device_type,
cfg.headless,
cfg.multi_gpu,
cfg.capture_video,
cfg.force_render,
cfg,
**kwargs,
)
if cfg.capture_video:
envs.is_vector_env = True
envs = gym.wrappers.RecordVideo(
envs,
f"videos/{run_name}",
step_trigger=lambda step: step % cfg.capture_video_freq == 0,
video_length=cfg.capture_video_len,
)
return envs
# register the rl-games adapter to use inside the runner
vecenv.register('RLGPU',
lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs))
env_configurations.register('rlgpu', {
'vecenv_type': 'RLGPU',
'env_creator': create_env_thunk,
})
# register new AMP network builder and agent
def build_runner(algo_observer):
runner = Runner(algo_observer)
runner.algo_factory.register_builder('self_play_continuous', lambda **kwargs: ppo_sp_agent.SPAgent(**kwargs))
runner.algo_factory.register_builder('self_play_hrl', lambda **kwargs: hrl_sp_agent.HRLSPAgent(**kwargs))
runner.algo_factory.register_builder('ase', lambda **kwargs: ase_agent.ASEAgent(**kwargs))
runner.player_factory.register_builder('self_play_continuous',
lambda **kwargs: ppo_sp_player.SPPlayer(**kwargs))
runner.player_factory.register_builder('self_play_hrl',
lambda **kwargs: hrl_sp_player.HRLSPPlayer(**kwargs))
# runner.
model_builder.register_model('hrl', lambda network, **kwargs: hrl_models.ModelHRLContinuous(network))
model_builder.register_model('ase', lambda network, **kwargs: ase_models.ModelASEContinuous(network))
model_builder.register_model('vectorized_a2c',
lambda network, **kwargs: vectorized_models.ModelVectorizedA2C(network))
model_builder.register_network('vectorized_a2c',
lambda **kwargs: vectorized_network_builder.VectorizedA2CBuilder())
model_builder.register_network('ase', lambda **kwargs: ase_network_builder.ASEBuilder())
model_builder.register_network('hrl', lambda **kwargs: hrl_network_builder.HRLBuilder())
return runner
rlg_config_dict = omegaconf_to_dict(cfg.train)
# convert CLI arguments into dictionory
# create runner and set the settings
runner = build_runner(RLGPUAlgoObserver())
runner.load(rlg_config_dict)
runner.reset()
# dump config dict
experiment_dir = os.path.join('runs', cfg.train.params.config.name)
os.makedirs(experiment_dir, exist_ok=True)
with open(os.path.join(experiment_dir, 'config.yaml'), 'w') as f:
f.write(OmegaConf.to_yaml(cfg))
if cfg.multi_gpu:
import horovod.torch as hvd
rank = hvd.rank()
else:
rank = 0
if cfg.wandb_activate and rank == 0:
# Make sure to install WandB if you actually use this.
import wandb
wandb.init(
project=cfg.wandb_project,
group=cfg.wandb_group,
entity=cfg.wandb_entity,
config=cfg_dict,
sync_tensorboard=True,
id=run_name,
resume="allow",
monitor_gym=True,
)
runner.run({
'train': not cfg.test,
'play': cfg.test,
'checkpoint': cfg.checkpoint,
'sigma': None
})
if cfg.wandb_activate and rank == 0:
wandb.finish()
if __name__ == "__main__":
launch_rlg_hydra()
================================================
FILE: timechamber/utils/config.py
================================================
import os
import sys
import yaml
from isaacgym import gymapi
from isaacgym import gymutil
import numpy as np
import random
import torch
SIM_TIMESTEP = 1.0 / 60.0
def parse_sim_params(args, cfg):
# initialize sim
sim_params = gymapi.SimParams()
sim_params.dt = SIM_TIMESTEP
sim_params.num_client_threads = args.num_subscenes
if args.physics_engine == "flex":
if args.device_type != "cpu":
print("WARNING: Using Flex with GPU instead of PHYSX!")
sim_params.flex.shape_collision_margin = 0.01
sim_params.flex.num_outer_iterations = 4
sim_params.flex.num_inner_iterations = 10
elif args.physics_engine == "physx":
sim_params.physx.solver_type = 1
sim_params.physx.num_position_iterations = 4
sim_params.physx.num_velocity_iterations = 0
sim_params.physx.num_threads = 4
sim_params.physx.use_gpu = args.use_gpu
sim_params.physx.num_subscenes = args.num_subscenes
sim_params.physx.max_gpu_contact_pairs = 8 * 1024 * 1024
sim_params.use_gpu_pipeline = args.use_gpu_pipeline
sim_params.physx.use_gpu = args.use_gpu
# if sim options are provided in cfg, parse them and update/override above:
if "sim" in cfg:
gymutil.parse_sim_config(cfg["sim"], sim_params)
# Override num_threads if passed on the command line
if args.physics_engine == "physx" and args.num_threads > 0:
sim_params.physx.num_threads = args.num_threads
return sim_params
================================================
FILE: timechamber/utils/gym_util.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from timechamber.utils import logger
from isaacgym import gymapi
import numpy as np
import torch
from isaacgym.torch_utils import *
from isaacgym import gymtorch
def setup_gym_viewer(config):
gym = initialize_gym(config)
sim, viewer = configure_gym(gym, config)
return gym, sim, viewer
def initialize_gym(config):
gym = gymapi.acquire_gym()
if not gym.initialize():
logger.warn("*** Failed to initialize gym")
quit()
return gym
def configure_gym(gym, config):
engine, render = config['engine'], config['render']
# physics engine settings
if(engine == 'FLEX'):
sim_engine = gymapi.SIM_FLEX
elif(engine == 'PHYSX'):
sim_engine = gymapi.SIM_PHYSX
else:
logger.warn("Uknown physics engine. defaulting to FLEX")
sim_engine = gymapi.SIM_FLEX
# gym viewer
if render:
# create viewer
sim = gym.create_sim(0, 0, sim_type=sim_engine)
viewer = gym.create_viewer(
sim, int(gymapi.DEFAULT_VIEWER_WIDTH / 1.25),
int(gymapi.DEFAULT_VIEWER_HEIGHT / 1.25)
)
if viewer is None:
logger.warn("*** Failed to create viewer")
quit()
# enable left mouse click or space bar for throwing projectiles
if config['add_projectiles']:
gym.subscribe_viewer_mouse_event(viewer, gymapi.MOUSE_LEFT_BUTTON, "shoot")
gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_SPACE, "shoot")
else:
sim = gym.create_sim(0, -1)
viewer = None
# simulation params
scene_config = config['env']['scene']
sim_params = gymapi.SimParams()
sim_params.solver_type = scene_config['SolverType']
sim_params.num_outer_iterations = scene_config['NumIterations']
sim_params.num_inner_iterations = scene_config['NumInnerIterations']
sim_params.relaxation = scene_config.get('Relaxation', 0.75)
sim_params.warm_start = scene_config.get('WarmStart', 0.25)
sim_params.geometric_stiffness = scene_config.get('GeometricStiffness', 1.0)
sim_params.shape_collision_margin = 0.01
sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0)
gym.set_sim_params(sim, sim_params)
return sim, viewer
def parse_states_from_reference_states(reference_states, progress):
# parse reference states from DeepMimicState
global_quats_ref = torch.tensor(
reference_states._global_rotation[(progress,)].numpy(),
dtype=torch.double
).cuda()
ts_ref = torch.tensor(
reference_states._translation[(progress,)].numpy(),
dtype=torch.double
).cuda()
vels_ref = torch.tensor(
reference_states._velocity[(progress,)].numpy(),
dtype=torch.double
).cuda()
avels_ref = torch.tensor(
reference_states._angular_velocity[(progress,)].numpy(),
dtype=torch.double
).cuda()
return global_quats_ref, ts_ref, vels_ref, avels_ref
def parse_states_from_reference_states_with_motion_id(precomputed_state,
progress, motion_id):
assert len(progress) == len(motion_id)
# get the global id
global_id = precomputed_state['motion_offset'][motion_id] + progress
global_id = np.minimum(global_id,
precomputed_state['global_quats_ref'].shape[0] - 1)
# parse reference states from DeepMimicState
global_quats_ref = precomputed_state['global_quats_ref'][global_id]
ts_ref = precomputed_state['ts_ref'][global_id]
vels_ref = precomputed_state['vels_ref'][global_id]
avels_ref = precomputed_state['avels_ref'][global_id]
return global_quats_ref, ts_ref, vels_ref, avels_ref
def parse_dof_state_with_motion_id(precomputed_state, dof_state,
progress, motion_id):
assert len(progress) == len(motion_id)
# get the global id
global_id = precomputed_state['motion_offset'][motion_id] + progress
# NOTE: it should never reach the dof_state.shape, cause the episode is
# terminated 2 steps before
global_id = np.minimum(global_id, dof_state.shape[0] - 1)
# parse reference states from DeepMimicState
return dof_state[global_id]
def get_flatten_ids(precomputed_state):
motion_offsets = precomputed_state['motion_offset']
init_state_id, init_motion_id, global_id = [], [], []
for i_motion in range(len(motion_offsets) - 1):
i_length = motion_offsets[i_motion + 1] - motion_offsets[i_motion]
init_state_id.extend(range(i_length))
init_motion_id.extend([i_motion] * i_length)
if len(global_id) == 0:
global_id.extend(range(0, i_length))
else:
global_id.extend(range(global_id[-1] + 1,
global_id[-1] + i_length + 1))
return np.array(init_state_id), np.array(init_motion_id), \
np.array(global_id)
def parse_states_from_reference_states_with_global_id(precomputed_state,
global_id):
# get the global id
global_id = global_id % precomputed_state['global_quats_ref'].shape[0]
# parse reference states from DeepMimicState
global_quats_ref = precomputed_state['global_quats_ref'][global_id]
ts_ref = precomputed_state['ts_ref'][global_id]
vels_ref = precomputed_state['vels_ref'][global_id]
avels_ref = precomputed_state['avels_ref'][global_id]
return global_quats_ref, ts_ref, vels_ref, avels_ref
def get_robot_states_from_torch_tensor(config, ts, global_quats, vels, avels,
init_rot, progress, motion_length=-1,
actions=None, relative_rot=None,
motion_id=None, num_motion=None,
motion_onehot_matrix=None):
info = {}
# the observation with quaternion-based representation
torso_height = ts[..., 0, 1].cpu().numpy()
gttrny, gqny, vny, avny, info['root_yaw_inv'] = \
quaternion_math.compute_observation_return_info(global_quats, ts,
vels, avels)
joint_obs = np.concatenate([gttrny.cpu().numpy(), gqny.cpu().numpy(),
vny.cpu().numpy(), avny.cpu().numpy()], axis=-1)
joint_obs = joint_obs.reshape(joint_obs.shape[0], -1)
num_envs = joint_obs.shape[0]
obs = np.concatenate([torso_height[:, np.newaxis], joint_obs], -1)
# the previous action
if config['env_action_ob']:
obs = np.concatenate([obs, actions], axis=-1)
# the orientation
if config['env_orientation_ob']:
if relative_rot is not None:
obs = np.concatenate([obs, relative_rot], axis=-1)
else:
curr_rot = global_quats[np.arange(num_envs)][:, 0]
curr_rot = curr_rot.reshape(num_envs, -1, 4)
relative_rot = quaternion_math.compute_orientation_drift(
init_rot, curr_rot
).cpu().numpy()
obs = np.concatenate([obs, relative_rot], axis=-1)
if config['env_frame_ob']:
if type(motion_length) == np.ndarray:
motion_length = motion_length.astype(np.float)
progress_ob = np.expand_dims(progress.astype(np.float) /
motion_length, axis=-1)
else:
progress_ob = np.expand_dims(progress.astype(np.float) /
float(motion_length), axis=-1)
obs = np.concatenate([obs, progress_ob], axis=-1)
if config['env_motion_ob'] and not config['env_motion_ob_onehot']:
motion_id_ob = np.expand_dims(motion_id.astype(np.float) /
float(num_motion), axis=-1)
obs = np.concatenate([obs, motion_id_ob], axis=-1)
elif config['env_motion_ob'] and config['env_motion_ob_onehot']:
motion_id_ob = motion_onehot_matrix[motion_id]
obs = np.concatenate([obs, motion_id_ob], axis=-1)
return obs, info
def get_xyzoffset(start_ts, end_ts, root_yaw_inv):
xyoffset = (end_ts - start_ts)[:, [0], :].reshape(1, -1, 1, 3)
ryinv = root_yaw_inv.reshape(1, -1, 1, 4)
calibrated_xyz_offset = quaternion_math.quat_apply(ryinv, xyoffset)[0, :, 0, :]
return calibrated_xyz_offset
================================================
FILE: timechamber/utils/logger.py
================================================
# -----------------------------------------------------------------------------
# @brief:
# The logger here will be called all across the project. It is inspired
# by Yuxin Wu (ppwwyyxx@gmail.com)
#
# @author:
# Tingwu Wang, 2017, Feb, 20th
# -----------------------------------------------------------------------------
import logging
import sys
import os
import datetime
from termcolor import colored
__all__ = ['set_file_handler'] # the actual worker is the '_logger'
class _MyFormatter(logging.Formatter):
'''
@brief:
a class to make sure the format could be used
'''
def format(self, record):
date = colored('[%(asctime)s @%(filename)s:%(lineno)d]', 'green')
msg = '%(message)s'
if record.levelno == logging.WARNING:
fmt = date + ' ' + \
colored('WRN', 'red', attrs=[]) + ' ' + msg
elif record.levelno == logging.ERROR or \
record.levelno == logging.CRITICAL:
fmt = date + ' ' + \
colored('ERR', 'red', attrs=['underline']) + ' ' + msg
else:
fmt = date + ' ' + msg
if hasattr(self, '_style'):
# Python3 compatibilty
self._style._fmt = fmt
self._fmt = fmt
return super(self.__class__, self).format(record)
_logger = logging.getLogger('joint_embedding')
_logger.propagate = False
_logger.setLevel(logging.INFO)
# set the console output handler
con_handler = logging.StreamHandler(sys.stdout)
con_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
_logger.addHandler(con_handler)
class GLOBAL_PATH(object):
def __init__(self, path=None):
if path is None:
path = os.getcwd()
self.path = path
def _set_path(self, path):
self.path = path
def _get_path(self):
return self.path
PATH = GLOBAL_PATH()
def set_file_handler(path=None, prefix='', time_str=''):
# set the file output handler
if time_str == '':
file_name = prefix + \
datetime.datetime.now().strftime("%A_%d_%B_%Y_%I:%M%p") + '.log'
else:
file_name = prefix + time_str + '.log'
if path is None:
mod = sys.modules['__main__']
path = os.path.join(os.path.abspath(mod.__file__), '..', '..', 'log')
else:
path = os.path.join(path, 'log')
path = os.path.abspath(path)
path = os.path.join(path, file_name)
if not os.path.exists(path):
os.makedirs(path)
PATH._set_path(path)
path = os.path.join(path, file_name)
from tensorboard_logger import configure
configure(path)
file_handler = logging.FileHandler(
filename=os.path.join(path, 'logger'), encoding='utf-8', mode='w')
file_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S'))
_logger.addHandler(file_handler)
_logger.info('Log file set to {}'.format(path))
return path
def _get_path():
return PATH._get_path()
_LOGGING_METHOD = ['info', 'warning', 'error', 'critical',
'warn', 'exception', 'debug']
# export logger functions
for func in _LOGGING_METHOD:
locals()[func] = getattr(_logger, func)
================================================
FILE: timechamber/utils/motion_lib.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import os
import yaml
from timechamber.tasks.ase_humanoid_base.poselib.poselib.skeleton.skeleton3d import SkeletonMotion
from timechamber.tasks.ase_humanoid_base.poselib.poselib.core.rotation3d import *
from isaacgym.torch_utils import *
from utils import torch_utils
import torch
USE_CACHE = True
print("MOVING MOTION DATA TO GPU, USING CACHE:", USE_CACHE)
if not USE_CACHE:
old_numpy = torch.Tensor.numpy
class Patch:
def numpy(self):
if self.is_cuda:
return self.to("cpu").numpy()
else:
return old_numpy(self)
torch.Tensor.numpy = Patch.numpy
class DeviceCache:
def __init__(self, obj, device):
self.obj = obj
self.device = device
keys = dir(obj)
num_added = 0
for k in keys:
try:
out = getattr(obj, k)
except:
print("Error for key=", k)
continue
if isinstance(out, torch.Tensor):
if out.is_floating_point():
out = out.to(self.device, dtype=torch.float32)
else:
out.to(self.device)
setattr(self, k, out)
num_added += 1
elif isinstance(out, np.ndarray):
out = torch.tensor(out)
if out.is_floating_point():
out = out.to(self.device, dtype=torch.float32)
else:
out.to(self.device)
setattr(self, k, out)
num_added += 1
print("Total added", num_added)
def __getattr__(self, string):
out = getattr(self.obj, string)
return out
class MotionLib():
def __init__(self, motion_file, dof_body_ids, dof_offsets,
key_body_ids, device):
self._dof_body_ids = dof_body_ids
self._dof_offsets = dof_offsets
self._num_dof = dof_offsets[-1]
self._key_body_ids = torch.tensor(key_body_ids, device=device)
self._device = device
self._load_motions(motion_file)
motions = self._motions
self.gts = torch.cat([m.global_translation for m in motions], dim=0).float()
self.grs = torch.cat([m.global_rotation for m in motions], dim=0).float()
self.lrs = torch.cat([m.local_rotation for m in motions], dim=0).float()
self.grvs = torch.cat([m.global_root_velocity for m in motions], dim=0).float()
self.gravs = torch.cat([m.global_root_angular_velocity for m in motions], dim=0).float()
self.dvs = torch.cat([m.dof_vels for m in motions], dim=0).float()
lengths = self._motion_num_frames
lengths_shifted = lengths.roll(1)
lengths_shifted[0] = 0
self.length_starts = lengths_shifted.cumsum(0)
self.motion_ids = torch.arange(len(self._motions), dtype=torch.long, device=self._device)
return
def num_motions(self):
return len(self._motions)
def get_total_length(self):
return sum(self._motion_lengths)
def get_motion(self, motion_id):
return self._motions[motion_id]
def sample_motions(self, n):
motion_ids = torch.multinomial(self._motion_weights, num_samples=n, replacement=True)
# m = self.num_motions()
# motion_ids = np.random.choice(m, size=n, replace=True, p=self._motion_weights)
# motion_ids = torch.tensor(motion_ids, device=self._device, dtype=torch.long)
return motion_ids
def sample_time(self, motion_ids, truncate_time=None):
n = len(motion_ids)
phase = torch.rand(motion_ids.shape, device=self._device)
motion_len = self._motion_lengths[motion_ids]
if (truncate_time is not None):
assert(truncate_time >= 0.0)
motion_len -= truncate_time
motion_time = phase * motion_len
return motion_time
def get_motion_length(self, motion_ids):
return self._motion_lengths[motion_ids]
def get_motion_state(self, motion_ids, motion_times):
n = len(motion_ids)
num_bodies = self._get_num_bodies()
num_key_bodies = self._key_body_ids.shape[0]
motion_len = self._motion_lengths[motion_ids]
num_frames = self._motion_num_frames[motion_ids]
dt = self._motion_dt[motion_ids]
frame_idx0, frame_idx1, blend = self._calc_frame_blend(motion_times, motion_len, num_frames, dt)
f0l = frame_idx0 + self.length_starts[motion_ids]
f1l = frame_idx1 + self.length_starts[motion_ids]
root_pos0 = self.gts[f0l, 0]
root_pos1 = self.gts[f1l, 0]
root_rot0 = self.grs[f0l, 0]
root_rot1 = self.grs[f1l, 0]
local_rot0 = self.lrs[f0l]
local_rot1 = self.lrs[f1l]
root_vel = self.grvs[f0l]
root_ang_vel = self.gravs[f0l]
key_pos0 = self.gts[f0l.unsqueeze(-1), self._key_body_ids.unsqueeze(0)]
key_pos1 = self.gts[f1l.unsqueeze(-1), self._key_body_ids.unsqueeze(0)]
dof_vel = self.dvs[f0l]
vals = [root_pos0, root_pos1, local_rot0, local_rot1, root_vel, root_ang_vel, key_pos0, key_pos1]
for v in vals:
assert v.dtype != torch.float64
blend = blend.unsqueeze(-1)
root_pos = (1.0 - blend) * root_pos0 + blend * root_pos1
root_rot = torch_utils.slerp(root_rot0, root_rot1, blend)
blend_exp = blend.unsqueeze(-1)
key_pos = (1.0 - blend_exp) * key_pos0 + blend_exp * key_pos1
local_rot = torch_utils.slerp(local_rot0, local_rot1, torch.unsqueeze(blend, axis=-1))
dof_pos = self._local_rotation_to_dof(local_rot)
return root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos
def _load_motions(self, motion_file):
self._motions = []
self._motion_lengths = []
self._motion_weights = []
self._motion_fps = []
self._motion_dt = []
self._motion_num_frames = []
self._motion_files = []
total_len = 0.0
motion_files, motion_weights = self._fetch_motion_files(motion_file)
num_motion_files = len(motion_files)
for f in range(num_motion_files):
curr_file = motion_files[f]
print("Loading {:d}/{:d} motion files: {:s}".format(f + 1, num_motion_files, curr_file))
curr_motion = SkeletonMotion.from_file(curr_file)
motion_fps = curr_motion.fps
curr_dt = 1.0 / motion_fps
num_frames = curr_motion.tensor.shape[0]
curr_len = 1.0 / motion_fps * (num_frames - 1)
self._motion_fps.append(motion_fps)
self._motion_dt.append(curr_dt)
self._motion_num_frames.append(num_frames)
curr_dof_vels = self._compute_motion_dof_vels(curr_motion)
curr_motion.dof_vels = curr_dof_vels
# Moving motion tensors to the GPU
if USE_CACHE:
curr_motion = DeviceCache(curr_motion, self._device)
else:
curr_motion.tensor = curr_motion.tensor.to(self._device)
curr_motion._skeleton_tree._parent_indices = curr_motion._skeleton_tree._parent_indices.to(self._device)
curr_motion._skeleton_tree._local_translation = curr_motion._skeleton_tree._local_translation.to(self._device)
curr_motion._rotation = curr_motion._rotation.to(self._device)
self._motions.append(curr_motion)
self._motion_lengths.append(curr_len)
curr_weight = motion_weights[f]
self._motion_weights.append(curr_weight)
self._motion_files.append(curr_file)
self._motion_lengths = torch.tensor(self._motion_lengths, device=self._device, dtype=torch.float32)
self._motion_weights = torch.tensor(self._motion_weights, dtype=torch.float32, device=self._device)
self._motion_weights /= self._motion_weights.sum()
self._motion_fps = torch.tensor(self._motion_fps, device=self._device, dtype=torch.float32)
self._motion_dt = torch.tensor(self._motion_dt, device=self._device, dtype=torch.float32)
self._motion_num_frames = torch.tensor(self._motion_num_frames, device=self._device)
num_motions = self.num_motions()
total_len = self.get_total_length()
print("Loaded {:d} motions with a total length of {:.3f}s.".format(num_motions, total_len))
return
def _fetch_motion_files(self, motion_file):
ext = os.path.splitext(motion_file)[1]
if (ext == ".yaml"):
dir_name = os.path.dirname(motion_file)
motion_files = []
motion_weights = []
with open(os.path.join(os.getcwd(), motion_file), 'r') as f:
motion_config = yaml.load(f, Loader=yaml.SafeLoader)
motion_list = motion_config['motions']
for motion_entry in motion_list:
curr_file = motion_entry['file']
curr_weight = motion_entry['weight']
assert(curr_weight >= 0)
curr_file = os.path.join(dir_name, curr_file)
motion_weights.append(curr_weight)
motion_files.append(curr_file)
else:
motion_files = [motion_file]
motion_weights = [1.0]
return motion_files, motion_weights
def _calc_frame_blend(self, time, len, num_frames, dt):
phase = time / len
phase = torch.clip(phase, 0.0, 1.0)
frame_idx0 = (phase * (num_frames - 1)).long()
frame_idx1 = torch.min(frame_idx0 + 1, num_frames - 1)
blend = (time - frame_idx0 * dt) / dt
return frame_idx0, frame_idx1, blend
def _get_num_bodies(self):
motion = self.get_motion(0)
num_bodies = motion.num_joints
return num_bodies
def _compute_motion_dof_vels(self, motion):
num_frames = motion.tensor.shape[0]
dt = 1.0 / motion.fps
dof_vels = []
for f in range(num_frames - 1):
local_rot0 = motion.local_rotation[f]
local_rot1 = motion.local_rotation[f + 1]
frame_dof_vel = self._local_rotation_to_dof_vel(local_rot0, local_rot1, dt)
frame_dof_vel = frame_dof_vel
dof_vels.append(frame_dof_vel)
dof_vels.append(dof_vels[-1])
dof_vels = torch.stack(dof_vels, dim=0)
return dof_vels
def _local_rotation_to_dof(self, local_rot):
body_ids = self._dof_body_ids
dof_offsets = self._dof_offsets
n = local_rot.shape[0]
dof_pos = torch.zeros((n, self._num_dof), dtype=torch.float, device=self._device)
for j in range(len(body_ids)):
body_id = body_ids[j]
joint_offset = dof_offsets[j]
joint_size = dof_offsets[j + 1] - joint_offset
if (joint_size == 3):
joint_q = local_rot[:, body_id]
joint_exp_map = torch_utils.quat_to_exp_map(joint_q)
dof_pos[:, joint_offset:(joint_offset + joint_size)] = joint_exp_map
elif (joint_size == 1):
joint_q = local_rot[:, body_id]
joint_theta, joint_axis = torch_utils.quat_to_angle_axis(joint_q)
joint_theta = joint_theta * joint_axis[..., 1] # assume joint is always along y axis
joint_theta = normalize_angle(joint_theta)
dof_pos[:, joint_offset] = joint_theta
else:
print("Unsupported joint type")
assert(False)
return dof_pos
def _local_rotation_to_dof_vel(self, local_rot0, local_rot1, dt):
body_ids = self._dof_body_ids
dof_offsets = self._dof_offsets
dof_vel = torch.zeros([self._num_dof], device=self._device)
diff_quat_data = quat_mul_norm(quat_inverse(local_rot0), local_rot1)
diff_angle, diff_axis = quat_angle_axis(diff_quat_data)
local_vel = diff_axis * diff_angle.unsqueeze(-1) / dt
local_vel = local_vel
for j in range(len(body_ids)):
body_id = body_ids[j]
joint_offset = dof_offsets[j]
joint_size = dof_offsets[j + 1] - joint_offset
if (joint_size == 3):
joint_vel = local_vel[body_id]
dof_vel[joint_offset:(joint_offset + joint_size)] = joint_vel
elif (joint_size == 1):
assert(joint_size == 1)
joint_vel = local_vel[body_id]
dof_vel[joint_offset] = joint_vel[1] # assume joint is always along y axis
else:
print("Unsupported joint type")
assert(False)
return dof_vel
================================================
FILE: timechamber/utils/reformat.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from omegaconf import DictConfig, OmegaConf
from typing import Dict
def omegaconf_to_dict(d: DictConfig)->Dict:
"""Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation."""
ret = {}
for k, v in d.items():
if isinstance(v, DictConfig):
ret[k] = omegaconf_to_dict(v)
else:
ret[k] = v
return ret
def print_dict(val, nesting: int = -4, start: bool = True):
"""Outputs a nested dictionory."""
if type(val) == dict:
if not start:
print('')
nesting += 4
for k in val:
print(nesting * ' ', end='')
print(k, end=': ')
print_dict(val[k], nesting, start=False)
else:
print(val)
# EOF
================================================
FILE: timechamber/utils/rlgames_utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from rl_games.common import env_configurations, vecenv
from rl_games.common.algo_observer import AlgoObserver
from rl_games.algos_torch import torch_ext
from timechamber.utils.utils import set_seed
import torch
import numpy as np
from typing import Callable
from isaacgym import gymapi
from isaacgym import gymutil
from omegaconf import DictConfig
from timechamber.tasks import isaacgym_task_map
from timechamber.utils.vec_task_wrappers import VecTaskPythonWrapper
from timechamber.utils.config import parse_sim_params
SIM_TIMESTEP = 1.0 / 60.0
def get_rlgames_env_creator(
# used to create the vec task
seed: int,
cfg: DictConfig,
task_config: dict,
task_name: str,
sim_device: str,
rl_device: str,
graphics_device_id: int,
headless: bool,
device_type: str = "cuda",
# Used to handle multi-gpu case
multi_gpu: bool = False,
post_create_hook: Callable = None,
virtual_screen_capture: bool = False,
force_render: bool = False,
):
"""Parses the configuration parameters for the environment task and creates a VecTask
Args:
task_config: environment configuration.
task_name: Name of the task, used to evaluate based on the imported name (eg 'Trifinger')
sim_device: The type of env device, eg 'cuda:0'
rl_device: Device that RL will be done on, eg 'cuda:0'
graphics_device_id: Graphics device ID.
headless: Whether to run in headless mode.
multi_gpu: Whether to use multi gpu
post_create_hook: Hooks to be called after environment creation.
[Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs]
virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`.
force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True)
Returns:
A VecTaskPython object.
"""
def create_rlgpu_env():
"""
Creates the task from configurations and wraps it using RL-games wrappers if required.
"""
# create native task and pass custom config
if task_name == "MA_Humanoid_Strike":
sim_params = parse_sim_params(cfg, task_config)
if cfg.physics_engine == "physx":
physics_engine = gymapi.SIM_PHYSX
elif cfg.physics_engine == "flex":
physics_engine = gymapi.SIM_FLEX
task = isaacgym_task_map[task_name](
cfg=task_config,
sim_params=sim_params,
physics_engine=physics_engine,
device_type=device_type,
device_id=graphics_device_id,
headless=headless
)
env = VecTaskPythonWrapper(task, rl_device,
task_config.get("clip_observations", np.inf),
task_config.get("clip_actions", 1.0),
AMP=True)
else:
task = isaacgym_task_map[task_name](
cfg=task_config,
rl_device=rl_device,
sim_device=sim_device,
graphics_device_id=graphics_device_id,
headless=headless,
virtual_screen_capture=virtual_screen_capture,
force_render=force_render,
)
env = VecTaskPythonWrapper(task, rl_device, task_config.get("clip_observations", np.inf), task_config.get("clip_actions", 1.0))
if post_create_hook is not None:
post_create_hook()
return env
return create_rlgpu_env
class RLGPUAlgoObserver(AlgoObserver):
"""Allows us to log stats from the env along with the algorithm running stats. """
def __init__(self):
pass
def after_init(self, algo):
self.algo = algo
self.mean_scores = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device)
self.ep_infos = []
self.direct_info = {}
self.writer = self.algo.writer
def process_infos(self, infos, done_indices):
assert isinstance(infos, dict), "RLGPUAlgoObserver expects dict info"
if isinstance(infos, dict):
if 'episode' in infos:
self.ep_infos.append(infos['episode'])
if len(infos) > 0 and isinstance(infos, dict): # allow direct logging from env
self.direct_info = {}
for k, v in infos.items():
# only log scalars
if isinstance(v, float) or isinstance(v, int) or (isinstance(v, torch.Tensor) and len(v.shape) == 0):
self.direct_info[k] = v
def after_clear_stats(self):
self.mean_scores.clear()
def after_print_stats(self, frame, epoch_num, total_time):
if self.ep_infos:
for key in self.ep_infos[0]:
infotensor = torch.tensor([], device=self.algo.device)
for ep_info in self.ep_infos:
# handle scalar and zero dimensional tensor infos
if not isinstance(ep_info[key], torch.Tensor):
ep_info[key] = torch.Tensor([ep_info[key]])
if len(ep_info[key].shape) == 0:
ep_info[key] = ep_info[key].unsqueeze(0)
infotensor = torch.cat((infotensor, ep_info[key].to(self.algo.device)))
value = torch.mean(infotensor)
self.writer.add_scalar('Episode/' + key, value, epoch_num)
self.ep_infos.clear()
for k, v in self.direct_info.items():
self.writer.add_scalar(f'{k}/frame', v, frame)
self.writer.add_scalar(f'{k}/iter', v, epoch_num)
self.writer.add_scalar(f'{k}/time', v, total_time)
if self.mean_scores.current_size > 0:
mean_scores = self.mean_scores.get_mean()
self.writer.add_scalar('scores/mean', mean_scores, frame)
self.writer.add_scalar('scores/iter', mean_scores, epoch_num)
self.writer.add_scalar('scores/time', mean_scores, total_time)
class RLGPUEnv(vecenv.IVecEnv):
def __init__(self, config_name, num_actors, **kwargs):
self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs)
self.use_global_obs = (self.env.num_states > 0)
self.full_state = {}
self.full_state["obs"] = self.reset()
if self.use_global_obs:
self.full_state["states"] = self.env.get_state()
return
def step(self, action):
next_obs, reward, is_done, info = self.env.step(action)
# todo: improve, return only dictinary
self.full_state["obs"] = next_obs
if self.use_global_obs:
self.full_state["states"] = self.env.get_state()
return self.full_state, reward, is_done, info
def reset(self, env_ids=None):
self.full_state["obs"] = self.env.reset(env_ids)
if self.use_global_obs:
self.full_state["states"] = self.env.get_state()
return self.full_state
def get_number_of_agents(self):
return self.env.get_number_of_agents()
def get_env_info(self):
info = {}
info['action_space'] = self.env.action_space
info['observation_space'] = self.env.observation_space
info['amp_observation_space'] = self.env.amp_observation_space
if self.use_global_obs:
info['state_space'] = self.env.state_space
print(info['action_space'], info['observation_space'], info['state_space'])
else:
print(info['action_space'], info['observation_space'])
return info
================================================
FILE: timechamber/utils/torch_jit_utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
import numpy as np
from isaacgym.torch_utils import *
@torch.jit.script
def compute_heading_and_up(
torso_rotation, inv_start_rot, to_target, vec0, vec1, up_idx
):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, int) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
num_envs = torso_rotation.shape[0]
target_dirs = normalize(to_target)
torso_quat = quat_mul(torso_rotation, inv_start_rot)
up_vec = get_basis_vector(torso_quat, vec1).view(num_envs, 3)
heading_vec = get_basis_vector(torso_quat, vec0).view(num_envs, 3)
up_proj = up_vec[:, up_idx]
heading_proj = torch.bmm(heading_vec.view(
num_envs, 1, 3), target_dirs.view(num_envs, 3, 1)).view(num_envs)
return torso_quat, up_proj, heading_proj, up_vec, heading_vec
@torch.jit.script
def compute_rot(torso_quat, velocity, ang_velocity, targets, torso_positions):
vel_loc = quat_rotate_inverse(torso_quat, velocity)
angvel_loc = quat_rotate_inverse(torso_quat, ang_velocity)
roll, pitch, yaw = get_euler_xyz(torso_quat)
walk_target_angle = torch.atan2(targets[:, 2] - torso_positions[:, 2],
targets[:, 0] - torso_positions[:, 0])
angle_to_target = walk_target_angle - yaw
return vel_loc, angvel_loc, roll, pitch, yaw, angle_to_target
@torch.jit.script
def quat_axis(q, axis=0):
# type: (Tensor, int) -> Tensor
basis_vec = torch.zeros(q.shape[0], 3, device=q.device)
basis_vec[:, axis] = 1
return quat_rotate(q, basis_vec)
"""
Normalization and Denormalization of Tensors
"""
@torch.jit.script
def scale_transform(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor:
"""
Normalizes a given input tensor to a range of [-1, 1].
@note It uses pytorch broadcasting functionality to deal with batched input.
Args:
x: Input tensor of shape (N, dims).
lower: The minimum value of the tensor. Shape (dims,)
upper: The maximum value of the tensor. Shape (dims,)
Returns:
Normalized transform of the tensor. Shape (N, dims)
"""
# default value of center
offset = (lower + upper) * 0.5
# return normalized tensor
return 2 * (x - offset) / (upper - lower)
@torch.jit.script
def unscale_transform(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor:
"""
Denormalizes a given input tensor from range of [-1, 1] to (lower, upper).
@note It uses pytorch broadcasting functionality to deal with batched input.
Args:
x: Input tensor of shape (N, dims).
lower: The minimum value of the tensor. Shape (dims,)
upper: The maximum value of the tensor. Shape (dims,)
Returns:
Denormalized transform of the tensor. Shape (N, dims)
"""
# default value of center
offset = (lower + upper) * 0.5
# return normalized tensor
return x * (upper - lower) * 0.5 + offset
@torch.jit.script
def saturate(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor:
"""
Clamps a given input tensor to (lower, upper).
@note It uses pytorch broadcasting functionality to deal with batched input.
Args:
x: Input tensor of shape (N, dims).
lower: The minimum value of the tensor. Shape (dims,)
upper: The maximum value of the tensor. Shape (dims,)
Returns:
Clamped transform of the tensor. Shape (N, dims)
"""
return torch.max(torch.min(x, upper), lower)
"""
Rotation conversions
"""
@torch.jit.script
def quat_diff_rad(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
"""
Get the difference in radians between two quaternions.
Args:
a: first quaternion, shape (N, 4)
b: second quaternion, shape (N, 4)
Returns:
Difference in radians, shape (N,)
"""
b_conj = quat_conjugate(b)
mul = quat_mul(a, b_conj)
# 2 * torch.acos(torch.abs(mul[:, -1]))
return 2.0 * torch.asin(
torch.clamp(
torch.norm(
mul[:, 0:3],
p=2, dim=-1), max=1.0)
)
@torch.jit.script
def local_to_world_space(pos_offset_local: torch.Tensor, pose_global: torch.Tensor):
""" Convert a point from the local frame to the global frame
Args:
pos_offset_local: Point in local frame. Shape: [N, 3]
pose_global: The spatial pose of this point. Shape: [N, 7]
Returns:
Position in the global frame. Shape: [N, 3]
"""
quat_pos_local = torch.cat(
[pos_offset_local, torch.zeros(pos_offset_local.shape[0], 1, dtype=torch.float32, device=pos_offset_local.device)],
dim=-1
)
quat_global = pose_global[:, 3:7]
quat_global_conj = quat_conjugate(quat_global)
pos_offset_global = quat_mul(quat_global, quat_mul(quat_pos_local, quat_global_conj))[:, 0:3]
result_pos_gloal = pos_offset_global + pose_global[:, 0:3]
return result_pos_gloal
# NB: do not make this function jit, since it is passed around as an argument.
def normalise_quat_in_pose(pose):
"""Takes a pose and normalises the quaternion portion of it.
Args:
pose: shape N, 7
Returns:
Pose with normalised quat. Shape N, 7
"""
pos = pose[:, 0:3]
quat = pose[:, 3:7]
quat /= torch.norm(quat, dim=-1, p=2).reshape(-1, 1)
return torch.cat([pos, quat], dim=-1)
@torch.jit.script
def my_quat_rotate(q, v):
shape = q.shape
q_w = q[:, -1]
q_vec = q[:, :3]
a = v * (2.0 * q_w ** 2 - 1.0).unsqueeze(-1)
b = torch.cross(q_vec, v, dim=-1) * q_w.unsqueeze(-1) * 2.0
c = q_vec * \
torch.bmm(q_vec.view(shape[0], 1, 3), v.view(
shape[0], 3, 1)).squeeze(-1) * 2.0
return a + b + c
@torch.jit.script
def quat_to_angle_axis(q):
# type: (Tensor) -> Tuple[Tensor, Tensor]
# computes axis-angle representation from quaternion q
# q must be normalized
min_theta = 1e-5
qx, qy, qz, qw = 0, 1, 2, 3
sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw])
angle = 2 * torch.acos(q[..., qw])
angle = normalize_angle(angle)
sin_theta_expand = sin_theta.unsqueeze(-1)
axis = q[..., qx:qw] / sin_theta_expand
mask = sin_theta > min_theta
default_axis = torch.zeros_like(axis)
default_axis[..., -1] = 1
angle = torch.where(mask, angle, torch.zeros_like(angle))
mask_expand = mask.unsqueeze(-1)
axis = torch.where(mask_expand, axis, default_axis)
return angle, axis
@torch.jit.script
def angle_axis_to_exp_map(angle, axis):
# type: (Tensor, Tensor) -> Tensor
# compute exponential map from axis-angle
angle_expand = angle.unsqueeze(-1)
exp_map = angle_expand * axis
return exp_map
@torch.jit.script
def quat_to_exp_map(q):
# type: (Tensor) -> Tensor
# compute exponential map from quaternion
# q must be normalized
angle, axis = quat_to_angle_axis(q)
exp_map = angle_axis_to_exp_map(angle, axis)
return exp_map
@torch.jit.script
def quat_to_tan_norm(q):
# type: (Tensor) -> Tensor
# represents a rotation using the tangent and normal vectors
ref_tan = torch.zeros_like(q[..., 0:3])
ref_tan[..., 0] = 1
tan = my_quat_rotate(q, ref_tan)
ref_norm = torch.zeros_like(q[..., 0:3])
ref_norm[..., -1] = 1
norm = my_quat_rotate(q, ref_norm)
norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1)
return norm_tan
@torch.jit.script
def euler_xyz_to_exp_map(roll, pitch, yaw):
# type: (Tensor, Tensor, Tensor) -> Tensor
q = quat_from_euler_xyz(roll, pitch, yaw)
exp_map = quat_to_exp_map(q)
return exp_map
@torch.jit.script
def exp_map_to_angle_axis(exp_map):
min_theta = 1e-5
angle = torch.norm(exp_map, dim=-1)
angle_exp = torch.unsqueeze(angle, dim=-1)
axis = exp_map / angle_exp
angle = normalize_angle(angle)
default_axis = torch.zeros_like(exp_map)
default_axis[..., -1] = 1
mask = angle > min_theta
angle = torch.where(mask, angle, torch.zeros_like(angle))
mask_expand = mask.unsqueeze(-1)
axis = torch.where(mask_expand, axis, default_axis)
return angle, axis
@torch.jit.script
def exp_map_to_quat(exp_map):
angle, axis = exp_map_to_angle_axis(exp_map)
q = quat_from_angle_axis(angle, axis)
return q
@torch.jit.script
def slerp(q0, q1, t):
# type: (Tensor, Tensor, Tensor) -> Tensor
qx, qy, qz, qw = 0, 1, 2, 3
cos_half_theta = q0[..., qw] * q1[..., qw] \
+ q0[..., qx] * q1[..., qx] \
+ q0[..., qy] * q1[..., qy] \
+ q0[..., qz] * q1[..., qz]
neg_mask = cos_half_theta < 0
q1 = q1.clone()
q1[neg_mask] = -q1[neg_mask]
cos_half_theta = torch.abs(cos_half_theta)
cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1)
half_theta = torch.acos(cos_half_theta);
sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta);
ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta;
ratioB = torch.sin(t * half_theta) / sin_half_theta;
new_q_x = ratioA * q0[..., qx:qx+1] + ratioB * q1[..., qx:qx+1]
new_q_y = ratioA * q0[..., qy:qy+1] + ratioB * q1[..., qy:qy+1]
new_q_z = ratioA * q0[..., qz:qz+1] + ratioB * q1[..., qz:qz+1]
new_q_w = ratioA * q0[..., qw:qw+1] + ratioB * q1[..., qw:qw+1]
cat_dim = len(new_q_w.shape) - 1
new_q = torch.cat([new_q_x, new_q_y, new_q_z, new_q_w], dim=cat_dim)
new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q)
new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q)
return new_q
@torch.jit.script
def calc_heading(q):
# type: (Tensor) -> Tensor
# calculate heading direction from quaternion
# the heading is the direction on the xy plane
# q must be normalized
ref_dir = torch.zeros_like(q[..., 0:3])
ref_dir[..., 0] = 1
rot_dir = my_quat_rotate(q, ref_dir)
heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0])
return heading
@torch.jit.script
def calc_heading_quat(q):
# type: (Tensor) -> Tensor
# calculate heading rotation from quaternion
# the heading is the direction on the xy plane
# q must be normalized
heading = calc_heading(q)
axis = torch.zeros_like(q[..., 0:3])
axis[..., 2] = 1
heading_q = quat_from_angle_axis(heading, axis)
return heading_q
@torch.jit.script
def calc_heading_quat_inv(q):
# type: (Tensor) -> Tensor
# calculate heading rotation from quaternion
# the heading is the direction on the xy plane
# q must be normalized
heading = calc_heading(q)
axis = torch.zeros_like(q[..., 0:3])
axis[..., 2] = 1
heading_q = quat_from_angle_axis(-heading, axis)
return heading_q
# EOF
================================================
FILE: timechamber/utils/torch_utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch
import numpy as np
from isaacgym.torch_utils import *
@torch.jit.script
def quat_to_angle_axis(q):
# type: (Tensor) -> Tuple[Tensor, Tensor]
# computes axis-angle representation from quaternion q
# q must be normalized
min_theta = 1e-5
qx, qy, qz, qw = 0, 1, 2, 3
sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw])
angle = 2 * torch.acos(q[..., qw])
angle = normalize_angle(angle)
sin_theta_expand = sin_theta.unsqueeze(-1)
axis = q[..., qx:qw] / sin_theta_expand
mask = torch.abs(sin_theta) > min_theta
default_axis = torch.zeros_like(axis)
default_axis[..., -1] = 1
angle = torch.where(mask, angle, torch.zeros_like(angle))
mask_expand = mask.unsqueeze(-1)
axis = torch.where(mask_expand, axis, default_axis)
return angle, axis
@torch.jit.script
def angle_axis_to_exp_map(angle, axis):
# type: (Tensor, Tensor) -> Tensor
# compute exponential map from axis-angle
angle_expand = angle.unsqueeze(-1)
exp_map = angle_expand * axis
return exp_map
@torch.jit.script
def quat_to_exp_map(q):
# type: (Tensor) -> Tensor
# compute exponential map from quaternion
# q must be normalized
angle, axis = quat_to_angle_axis(q)
exp_map = angle_axis_to_exp_map(angle, axis)
return exp_map
@torch.jit.script
def quat_to_tan_norm(q):
# type: (Tensor) -> Tensor
# represents a rotation using the tangent and normal vectors
ref_tan = torch.zeros_like(q[..., 0:3])
ref_tan[..., 0] = 1
tan = quat_rotate(q, ref_tan)
ref_norm = torch.zeros_like(q[..., 0:3])
ref_norm[..., -1] = 1
norm = quat_rotate(q, ref_norm)
norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1)
return norm_tan
@torch.jit.script
def euler_xyz_to_exp_map(roll, pitch, yaw):
# type: (Tensor, Tensor, Tensor) -> Tensor
q = quat_from_euler_xyz(roll, pitch, yaw)
exp_map = quat_to_exp_map(q)
return exp_map
@torch.jit.script
def exp_map_to_angle_axis(exp_map):
min_theta = 1e-5
angle = torch.norm(exp_map, dim=-1)
angle_exp = torch.unsqueeze(angle, dim=-1)
axis = exp_map / angle_exp
angle = normalize_angle(angle)
default_axis = torch.zeros_like(exp_map)
default_axis[..., -1] = 1
mask = torch.abs(angle) > min_theta
angle = torch.where(mask, angle, torch.zeros_like(angle))
mask_expand = mask.unsqueeze(-1)
axis = torch.where(mask_expand, axis, default_axis)
return angle, axis
@torch.jit.script
def exp_map_to_quat(exp_map):
angle, axis = exp_map_to_angle_axis(exp_map)
q = quat_from_angle_axis(angle, axis)
return q
@torch.jit.script
def slerp(q0, q1, t):
# type: (Tensor, Tensor, Tensor) -> Tensor
cos_half_theta = torch.sum(q0 * q1, dim=-1)
neg_mask = cos_half_theta < 0
q1 = q1.clone()
q1[neg_mask] = -q1[neg_mask]
cos_half_theta = torch.abs(cos_half_theta)
cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1)
half_theta = torch.acos(cos_half_theta);
sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta);
ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta;
ratioB = torch.sin(t * half_theta) / sin_half_theta;
new_q = ratioA * q0 + ratioB * q1
new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q)
new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q)
return new_q
@torch.jit.script
def calc_heading(q):
# type: (Tensor) -> Tensor
# calculate heading direction from quaternion
# the heading is the direction on the xy plane
# q must be normalized
ref_dir = torch.zeros_like(q[..., 0:3])
ref_dir[..., 0] = 1
rot_dir = quat_rotate(q, ref_dir)
heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0])
return heading
@torch.jit.script
def calc_heading_quat(q):
# type: (Tensor) -> Tensor
# calculate heading rotation from quaternion
# the heading is the direction on the xy plane
# q must be normalized
heading = calc_heading(q)
axis = torch.zeros_like(q[..., 0:3])
axis[..., 2] = 1
heading_q = quat_from_angle_axis(heading, axis)
return heading_q
@torch.jit.script
def calc_heading_quat_inv(q):
# type: (Tensor) -> Tensor
# calculate heading rotation from quaternion
# the heading is the direction on the xy plane
# q must be normalized
heading = calc_heading(q)
axis = torch.zeros_like(q[..., 0:3])
axis[..., 2] = 1
heading_q = quat_from_angle_axis(-heading, axis)
return heading_q
================================================
FILE: timechamber/utils/utils.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# python
import numpy as np
import torch
import random
import os
from collections import OrderedDict
import time
from isaacgym import gymapi
from isaacgym import gymutil
def set_np_formatting():
""" formats numpy print """
np.set_printoptions(edgeitems=30, infstr='inf',
linewidth=4000, nanstr='nan', precision=2,
suppress=False, threshold=10000, formatter=None)
def set_seed(seed, torch_deterministic=False, rank=0):
""" set seed across modules """
if seed == -1 and torch_deterministic:
seed = 42 + rank
elif seed == -1:
seed = np.random.randint(0, 10000)
else:
seed = seed + rank
print("Setting seed: {}".format(seed))
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if torch_deterministic:
# refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
return seed
def load_check(checkpoint, normalize_input: bool, normalize_value: bool):
extras = OrderedDict()
if normalize_value and 'value_mean_std.running_mean' not in checkpoint['model'].keys():
extras['value_mean_std.running_mean'] = checkpoint['reward_mean_std']['running_mean']
extras['value_mean_std.running_var'] = checkpoint['reward_mean_std']['running_var']
extras['value_mean_std.count'] = checkpoint['reward_mean_std']['count']
if normalize_input and 'running_mean_std.running_mean' not in checkpoint['model'].keys():
extras['running_mean_std.running_mean'] = checkpoint['running_mean_std']['running_mean']
extras['running_mean_std.running_var'] = checkpoint['running_mean_std']['running_var']
extras['running_mean_std.count'] = checkpoint['running_mean_std']['count']
extras.update(checkpoint['model'])
checkpoint['model'] = extras
return checkpoint
def safe_filesystem_op(func, *args, **kwargs):
"""
This is to prevent spurious crashes related to saving checkpoints or restoring from checkpoints in a Network
Filesystem environment (i.e. NGC cloud or SLURM)
"""
num_attempts = 5
for attempt in range(num_attempts):
try:
return func(*args, **kwargs)
except Exception as exc:
print(f'Exception {exc} when trying to execute {func} with args:{args} and kwargs:{kwargs}...')
wait_sec = 2 ** attempt
print(f'Waiting {wait_sec} before trying again...')
time.sleep(wait_sec)
raise RuntimeError(f'Could not execute {func}, give up after {num_attempts} attempts...')
def safe_load(filename, device=None):
if device is not None:
return safe_filesystem_op(torch.load, filename, map_location=device)
else:
return safe_filesystem_op(torch.load, filename)
def load_checkpoint(filename, device=None):
print("=> loading checkpoint '{}'".format(filename))
state = safe_load(filename, device=device)
return state
def print_actor_info(gym, env, actor_handle):
name = gym.get_actor_name(env, actor_handle)
body_names = gym.get_actor_rigid_body_names(env, actor_handle)
body_dict = gym.get_actor_rigid_body_dict(env, actor_handle)
joint_names = gym.get_actor_joint_names(env, actor_handle)
joint_dict = gym.get_actor_joint_dict(env, actor_handle)
dof_names = gym.get_actor_dof_names(env, actor_handle)
dof_dict = gym.get_actor_dof_dict(env, actor_handle)
print()
print("===== Actor: %s =======================================" % name)
print("\nBodies")
print(body_names)
print(body_dict)
print("\nJoints")
print(joint_names)
print(joint_dict)
print("\n Degrees Of Freedom (DOFs)")
print(dof_names)
print(dof_dict)
print()
# Get body state information
body_states = gym.get_actor_rigid_body_states(
env, actor_handle, gymapi.STATE_ALL)
# Print some state slices
print("Poses from Body State:")
print(body_states['pose']) # print just the poses
print("\nVelocities from Body State:")
print(body_states['vel']) # print just the velocities
print()
# iterate through bodies and print name and position
body_positions = body_states['pose']['p']
for i in range(len(body_names)):
print("Body '%s' has position" % body_names[i], body_positions[i])
print("\nDOF states:")
# get DOF states
dof_states = gym.get_actor_dof_states(env, actor_handle, gymapi.STATE_ALL)
# print some state slices
# Print all states for each degree of freedom
print(dof_states)
print()
# iterate through DOFs and print name and position
dof_positions = dof_states['pos']
for i in range(len(dof_names)):
print("DOF '%s' has position" % dof_names[i], dof_positions[i])
def print_asset_info(asset, name, gym):
print("======== Asset info %s: ========" % (name))
num_bodies = gym.get_asset_rigid_body_count(asset)
num_joints = gym.get_asset_joint_count(asset)
num_dofs = gym.get_asset_dof_count(asset)
print("Got %d bodies, %d joints, and %d DOFs" %
(num_bodies, num_joints, num_dofs))
# Iterate through bodies
print("Bodies:")
for i in range(num_bodies):
name = gym.get_asset_rigid_body_name(asset, i)
print(" %2d: '%s'" % (i, name))
# Iterate through joints
print("Joints:")
for i in range(num_joints):
name = gym.get_asset_joint_name(asset, i)
type = gym.get_asset_joint_type(asset, i)
type_name = gym.get_joint_type_string(type)
print(" %2d: '%s' (%s)" % (i, name, type_name))
# iterate through degrees of freedom (DOFs)
print("DOFs:")
for i in range(num_dofs):
name = gym.get_asset_dof_name(asset, i)
type = gym.get_asset_dof_type(asset, i)
type_name = gym.get_dof_type_string(type)
print(" %2d: '%s' (%s)" % (i, name, type_name))
# EOF
================================================
FILE: timechamber/utils/vec_task.py
================================================
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from gym import spaces
from isaacgym import gymtorch
from isaacgym.torch_utils import to_torch
import torch
import numpy as np
# VecEnv Wrapper for RL training
class VecTask():
def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
self.task = task
self.num_environments = task.num_envs
self.num_agents = 1 # used for multi-agent environments
self.num_observations = task.num_obs
self.num_states = task.num_states
self.num_actions = task.num_actions
self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf)
self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf)
self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.)
self.clip_obs = clip_observations
self.clip_actions = clip_actions
self.rl_device = rl_device
print("RL device: ", rl_device)
def step(self, actions):
raise NotImplementedError
def reset(self):
raise NotImplementedError
def get_number_of_agents(self):
return self.num_agents
@property
def observation_space(self):
return self.obs_space
@property
def action_space(self):
return self.act_space
@property
def num_envs(self):
return self.num_environments
@property
def num_acts(self):
return self.num_actions
@property
def num_obs(self):
return self.num_observations
# C++ CPU Class
class VecTaskCPU(VecTask):
def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
self.sync_frame_time = sync_frame_time
def step(self, actions):
actions = actions.cpu().numpy()
self.task.render(self.sync_frame_time)
obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions))
return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device),
to_torch(rewards, dtype=torch.float, device=self.rl_device),
to_torch(resets, dtype=torch.uint8, device=self.rl_device), [])
def reset(self):
actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f')
# step the simulator
obs, rewards, resets, extras = self.task.step(actions)
return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device)
# C++ GPU Class
class VecTaskGPU(VecTask):
def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs))
self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,))
self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,))
def step(self, actions):
self.task.render(False)
actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions)
actions_tensor = gymtorch.unwrap_tensor(actions_clipped)
self.task.step(actions_tensor)
return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, []
def reset(self):
actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
actions_tensor = gymtorch.unwrap_tensor(actions)
# step the simulator
self.task.step(actions_tensor)
return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs)
# Python CPU/GPU Class
class VecTaskPython(VecTask):
def get_state(self):
return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
def step(self, actions):
actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions)
self.task.step(actions_tensor)
return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras
def reset(self):
actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
# step the simulator
self.task.step(actions)
return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
================================================
FILE: timechamber/utils/vec_task_wrappers.py
================================================
# Copyright (c) 2018-2022, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from gym import spaces
import numpy as np
import torch
from timechamber.utils.vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython
class VecTaskCPUWrapper(VecTaskCPU):
def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions)
return
class VecTaskGPUWrapper(VecTaskGPU):
def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
super().__init__(task, rl_device, clip_observations, clip_actions)
return
class VecTaskPythonWrapper(VecTaskPython):
def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0, AMP=False):
super().__init__(task, rl_device, clip_observations, clip_actions)
if AMP:
self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf)
else:
self._amp_obs_space = None
return
def reset(self, env_ids=None):
self.task.reset(env_ids)
return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
@property
def amp_observation_space(self):
return self._amp_obs_space
def fetch_amp_obs_demo(self, num_samples):
return self.task.fetch_amp_obs_demo(num_samples)