Repository: inspirai/TimeChamber Branch: main Commit: af3f3571c99a Files: 201 Total size: 120.5 MB Directory structure: gitextract_rvpupy7y/ ├── .gitattributes ├── .gitignore ├── LICENSE ├── LISENCE/ │ └── isaacgymenvs/ │ └── LICENSE ├── README.md ├── assets/ │ └── mjcf/ │ └── nv_ant.xml ├── docs/ │ └── environments.md ├── setup.py └── timechamber/ ├── __init__.py ├── ase/ │ ├── ase_agent.py │ ├── ase_models.py │ ├── ase_network_builder.py │ ├── ase_players.py │ ├── hrl_agent.py │ ├── hrl_models.py │ ├── hrl_network_builder.py │ ├── hrl_players.py │ └── utils/ │ ├── amp_agent.py │ ├── amp_datasets.py │ ├── amp_models.py │ ├── amp_network_builder.py │ ├── amp_players.py │ ├── common_agent.py │ ├── common_player.py │ └── replay_buffer.py ├── cfg/ │ ├── config.yaml │ ├── task/ │ │ ├── MA_Ant_Battle.yaml │ │ ├── MA_Ant_Sumo.yaml │ │ └── MA_Humanoid_Strike.yaml │ └── train/ │ ├── MA_Ant_BattlePPO.yaml │ ├── MA_Ant_SumoPPO.yaml │ ├── MA_Humanoid_StrikeHRL.yaml │ └── base/ │ └── ase_humanoid_hrl.yaml ├── learning/ │ ├── common_agent.py │ ├── common_player.py │ ├── hrl_sp_agent.py │ ├── hrl_sp_player.py │ ├── pfsp_player_pool.py │ ├── ppo_sp_agent.py │ ├── ppo_sp_player.py │ ├── replay_buffer.py │ ├── vectorized_models.py │ └── vectorized_network_builder.py ├── models/ │ ├── Humanoid_Strike/ │ │ ├── policy.pth │ │ └── policy_op.pth │ ├── ant_battle_2agents/ │ │ └── policy.pth │ ├── ant_battle_3agents/ │ │ └── policy.pth │ └── ant_sumo/ │ └── policy.pth ├── tasks/ │ ├── __init__.py │ ├── ase_humanoid_base/ │ │ ├── base_task.py │ │ ├── humanoid.py │ │ ├── humanoid_amp.py │ │ ├── humanoid_amp_task.py │ │ └── poselib/ │ │ ├── README.md │ │ ├── data/ │ │ │ ├── 01_01_cmu.fbx │ │ │ ├── 07_01_cmu.fbx │ │ │ ├── 08_02_cmu.fbx │ │ │ ├── 09_11_cmu.fbx │ │ │ ├── 49_08_cmu.fbx │ │ │ ├── 55_01_cmu.fbx │ │ │ ├── amp_humanoid_tpose.npy │ │ │ ├── cmu_tpose.npy │ │ │ ├── configs/ │ │ │ │ ├── retarget_cmu_to_amp.json │ │ │ │ └── retarget_sfu_to_amp.json │ │ │ └── sfu_tpose.npy │ │ ├── fbx_importer.py │ │ ├── generate_amp_humanoid_tpose.py │ │ ├── mjcf_importer.py │ │ ├── poselib/ │ │ │ ├── __init__.py │ │ │ ├── core/ │ │ │ │ ├── __init__.py │ │ │ │ ├── backend/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── abstract.py │ │ │ │ │ └── logger.py │ │ │ │ ├── rotation3d.py │ │ │ │ ├── tensor_utils.py │ │ │ │ └── tests/ │ │ │ │ ├── __init__.py │ │ │ │ └── test_rotation.py │ │ │ ├── skeleton/ │ │ │ │ ├── __init__.py │ │ │ │ ├── backend/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── fbx/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── fbx_backend.py │ │ │ │ │ └── fbx_read_wrapper.py │ │ │ │ └── skeleton3d.py │ │ │ └── visualization/ │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── core.py │ │ │ ├── plt_plotter.py │ │ │ ├── simple_plotter_tasks.py │ │ │ ├── skeleton_plotter_tasks.py │ │ │ └── tests/ │ │ │ ├── __init__.py │ │ │ └── test_plotter.py │ │ └── retarget_motion.py │ ├── base/ │ │ ├── __init__.py │ │ ├── ma_vec_task.py │ │ └── vec_task.py │ ├── data/ │ │ ├── assets/ │ │ │ └── mjcf/ │ │ │ └── amp_humanoid_sword_shield.xml │ │ ├── models/ │ │ │ └── llc_reallusion_sword_shield.pth │ │ └── motions/ │ │ └── reallusion_sword_shield/ │ │ ├── README.txt │ │ ├── RL_Avatar_Atk_2xCombo01_Motion.npy │ │ ├── RL_Avatar_Atk_2xCombo02_Motion.npy │ │ ├── RL_Avatar_Atk_2xCombo03_Motion.npy │ │ ├── RL_Avatar_Atk_2xCombo04_Motion.npy │ │ ├── RL_Avatar_Atk_2xCombo05_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo01_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo02_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo03_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo04_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo05_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo06_Motion.npy │ │ ├── RL_Avatar_Atk_3xCombo07_Motion.npy │ │ ├── RL_Avatar_Atk_4xCombo01_Motion.npy │ │ ├── RL_Avatar_Atk_4xCombo02_Motion.npy │ │ ├── RL_Avatar_Atk_4xCombo03_Motion.npy │ │ ├── RL_Avatar_Atk_Jump_Motion.npy │ │ ├── RL_Avatar_Atk_Kick_Motion.npy │ │ ├── RL_Avatar_Atk_ShieldCharge_Motion.npy │ │ ├── RL_Avatar_Atk_ShieldSwipe01_Motion.npy │ │ ├── RL_Avatar_Atk_ShieldSwipe02_Motion.npy │ │ ├── RL_Avatar_Atk_SlashDown_Motion.npy │ │ ├── RL_Avatar_Atk_SlashLeft_Motion.npy │ │ ├── RL_Avatar_Atk_SlashRight_Motion.npy │ │ ├── RL_Avatar_Atk_SlashUp_Motion.npy │ │ ├── RL_Avatar_Atk_Spin_Motion.npy │ │ ├── RL_Avatar_Atk_Stab_Motion.npy │ │ ├── RL_Avatar_Counter_Atk01_Motion.npy │ │ ├── RL_Avatar_Counter_Atk02_Motion.npy │ │ ├── RL_Avatar_Counter_Atk03_Motion.npy │ │ ├── RL_Avatar_Counter_Atk04_Motion.npy │ │ ├── RL_Avatar_Counter_Atk05_Motion.npy │ │ ├── RL_Avatar_Dodge_Backward_Motion.npy │ │ ├── RL_Avatar_Dodgle_Left_Motion.npy │ │ ├── RL_Avatar_Dodgle_Right_Motion.npy │ │ ├── RL_Avatar_Fall_Backward_Motion.npy │ │ ├── RL_Avatar_Fall_Left_Motion.npy │ │ ├── RL_Avatar_Fall_Right_Motion.npy │ │ ├── RL_Avatar_Fall_SpinLeft_Motion.npy │ │ ├── RL_Avatar_Fall_SpinRight_Motion.npy │ │ ├── RL_Avatar_Idle_Alert(0)_Motion.npy │ │ ├── RL_Avatar_Idle_Alert_Motion.npy │ │ ├── RL_Avatar_Idle_Battle(0)_Motion.npy │ │ ├── RL_Avatar_Idle_Battle_Motion.npy │ │ ├── RL_Avatar_Idle_Ready(0)_Motion.npy │ │ ├── RL_Avatar_Idle_Ready_Motion.npy │ │ ├── RL_Avatar_Kill_2xCombo01_Motion.npy │ │ ├── RL_Avatar_Kill_2xCombo02_Motion.npy │ │ ├── RL_Avatar_Kill_3xCombo01_Motion.npy │ │ ├── RL_Avatar_Kill_3xCombo02_Motion.npy │ │ ├── RL_Avatar_Kill_4xCombo01_Motion.npy │ │ ├── RL_Avatar_RunBackward_Motion.npy │ │ ├── RL_Avatar_RunForward_Motion.npy │ │ ├── RL_Avatar_RunLeft_Motion.npy │ │ ├── RL_Avatar_RunRight_Motion.npy │ │ ├── RL_Avatar_Shield_BlockBackward_Motion.npy │ │ ├── RL_Avatar_Shield_BlockCrouch_Motion.npy │ │ ├── RL_Avatar_Shield_BlockDown_Motion.npy │ │ ├── RL_Avatar_Shield_BlockLeft_Motion.npy │ │ ├── RL_Avatar_Shield_BlockRight_Motion.npy │ │ ├── RL_Avatar_Shield_BlockUp_Motion.npy │ │ ├── RL_Avatar_Standoff_Circle_Motion.npy │ │ ├── RL_Avatar_Standoff_Feint_Motion.npy │ │ ├── RL_Avatar_Standoff_Swing_Motion.npy │ │ ├── RL_Avatar_Sword_ParryBackward01_Motion.npy │ │ ├── RL_Avatar_Sword_ParryBackward02_Motion.npy │ │ ├── RL_Avatar_Sword_ParryBackward03_Motion.npy │ │ ├── RL_Avatar_Sword_ParryBackward04_Motion.npy │ │ ├── RL_Avatar_Sword_ParryCrouch_Motion.npy │ │ ├── RL_Avatar_Sword_ParryDown_Motion.npy │ │ ├── RL_Avatar_Sword_ParryLeft_Motion.npy │ │ ├── RL_Avatar_Sword_ParryRight_Motion.npy │ │ ├── RL_Avatar_Sword_ParryUp_Motion.npy │ │ ├── RL_Avatar_Taunt_PoundChest_Motion.npy │ │ ├── RL_Avatar_Taunt_Roar_Motion.npy │ │ ├── RL_Avatar_Taunt_ShieldKnock_Motion.npy │ │ ├── RL_Avatar_TurnLeft180_Motion.npy │ │ ├── RL_Avatar_TurnLeft90_Motion.npy │ │ ├── RL_Avatar_TurnRight180_Motion.npy │ │ ├── RL_Avatar_TurnRight90_Motion.npy │ │ ├── RL_Avatar_WalkBackward01_Motion.npy │ │ ├── RL_Avatar_WalkBackward02_Motion.npy │ │ ├── RL_Avatar_WalkForward01_Motion.npy │ │ ├── RL_Avatar_WalkForward02_Motion.npy │ │ ├── RL_Avatar_WalkLeft01_Motion.npy │ │ ├── RL_Avatar_WalkLeft02_Motion.npy │ │ ├── RL_Avatar_WalkRight01_Motion.npy │ │ ├── RL_Avatar_WalkRight02_Motion.npy │ │ └── dataset_reallusion_sword_shield.yaml │ ├── ma_ant_battle.py │ ├── ma_ant_sumo.py │ └── ma_humanoid_strike.py ├── train.py └── utils/ ├── config.py ├── gym_util.py ├── logger.py ├── motion_lib.py ├── reformat.py ├── rlgames_utils.py ├── torch_jit_utils.py ├── torch_utils.py ├── utils.py ├── vec_task.py └── vec_task_wrappers.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ ================================================ FILE: .gitignore ================================================ videos /timechamber/logs *train_dir* *ige_logs* *.egg-info /.vs /.vscode /_package /shaders ._tmptext.txt __pycache__/ /timechamber/tasks/__pycache__ /timechamber/utils/__pycache__ /timechamber/tasks/base/__pycache__ /tools/format/.lastrun *.pyc _doxygen /rlisaacgymenvsgpu/logs /timechamber/benchmarks/results /timechamber/simpletests/results *.pxd2 /tests/logs /timechamber/balance_bot.xml /timechamber/quadcopter.xml /timechamber/ingenuity.xml logs* nn/ runs/ .idea outputs/ *.hydra* /timechamber/wandb /test .gitlab ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2022 MIT Inspir.ai Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: LISENCE/isaacgymenvs/LICENSE ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # TimeChamber: A Massively Parallel Large Scale Self-Play Framework **** **TimeChamber** is a large scale self-play framework running on parallel simulation. Running self-play algorithms always need lots of hardware resources, especially on 3D physically simulated environments. We provide a self-play framework that can achieve fast training and evaluation with **ONLY ONE GPU**. TimeChamber is developed with the following key features: - **Parallel Simulation**: TimeChamber is built within [Isaac Gym](https://developer.nvidia.com/isaac-gym). Isaac Gym is a fast GPU-based simulation platform. It supports running thousands of environments in parallel on a single GPU.For example, on one NVIDIA Laptop RTX 3070Ti GPU, TimeChamber can reach **80,000+ mean FPS** by running 4,096 environments in parallel. - **Parallel Evaluation**: TimeChamber can fast calculate dozens of policies' ELO rating(represent their combat power). It also supports multi-player ELO calculations by [multi-elo](https://github.com/djcunningham0/multielo). Inspired by Vectorization techniques for [fast population-based training](https://github.com/instadeepai/fastpbrl), we leverage the vectorized models to evaluate different policy in parallel. - **Prioritized Fictitious Self-Play Benchmark**: We implement a classic PPO self-play algorithm on top of [rl_games](https://github.com/Denys88/rl_games), with a prioritized player pool to avoid cycles and improve the diversity of training policy.
- **Competitive Multi-Agent Tasks**: Inspired by [OpenAI RoboSumo](https://github.com/openai/robosumo) and [ASE](https://github.com/nv-tlabs/ASE), we introduce three competitive multi-agent tasks(e.g.,Ant Sumo,Ant Battle and Humanoid Strike) as examples. The efficiency of our self-play framework has been tested on these tasks. After days of training,our agent can discover some interesting physical skills like pulling, jumping,etc. **Welcome to contribute your own environments!** ## Installation **** Download and follow the installation instructions of Isaac Gym: https://developer.nvidia.com/isaac-gym Ensure that Isaac Gym works on your system by running one of the examples from the `python/examples` directory, like `joint_monkey.py`. If you have any trouble running the samples, please follow troubleshooting steps described in the [Isaac Gym Preview Release 3/4 installation instructions](https://developer.nvidia.com/isaac-gym). Then install this repo: ```bash pip install -e . ``` ## Quick Start **** ### Tasks Source code for tasks can be found in `timechamber/tasks`,The detailed settings of state/action/reward are in [here](./docs/environments.md). More interesting tasks will come soon. #### Humanoid Strike Humanoid Strike is a 3D environment with two simulated humanoid physics characters. Each character is equipped with a sword and shield with 37 degrees-of-freedom. The game will be restarted if one agent goes outside the arena. We measure how much the player damaged the opponent and how much the player was damaged by the opponent in the terminated step to determine the winner.
#### Ant Sumo Ant Sumo is a 3D environment with simulated physics that allows pairs of ant agents to compete against each other. To win, the agent has to push the opponent out of the ring. Every agent has 100 hp . Each step, If the agent's body touches the ground, its hp will be reduced by 1.The agent whose hp becomes 0 will be eliminated.
#### Ant Battle Ant Battle is an expanded environment of Ant Sumo. It supports more than two agents competing against with each other. The battle ring radius will shrink, the agent going out of the ring will be eliminated.
### Self-Play Training To train your policy for tasks, for example: ```bash # run self-play training for Humanoid Strike task python train.py task=MA_Humanoid_Strike headless=True ``` ```bash # run self-play training for Ant Sumo task python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO headless=True ``` ```bash # run self-play training for Ant Battle task python train.py task=MA_Ant_Battle train=MA_Ant_BattlePPO headless=True ``` Key arguments to the training script follow [IsaacGymEnvs Configuration and command line arguments](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/README.md#configuration-and-command-line-arguments) . Other training arguments follow [rl_games config parameters](https://github.com/Denys88/rl_games#config-parameters), you can change them in `timechamber/tasks/train/*.yaml`. There are some specific arguments for self-play training: - `num_agents`: Set the number of agents for Ant Battle environment, it should be larger than 1. - `op_checkpoint`: Set to path to the checkpoint to load initial opponent agent policy. If it's empty, opponent agent will use random policy. - `update_win_rate`: Win_rate threshold to add the current policy to opponent's player pool. - `player_pool_length`: The max size of player pool, following FIFO rules. - `games_to_check`: Warm up for training, the player pool won't be updated until the current policy plays such number of games. - `max_update_steps`: If current policy update iterations exceed that number, the current policy will be added to opponent player_pool. ### Policies Evaluation To evaluate your policies, for example: ```bash # run testing for Ant Sumo policy python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO test=True num_envs=4 minibatch_size=32 headless=False checkpoint='models/ant_sumo/policy.pth' ``` ```bash # run testing for Humanoid Strike policy python train.py task=MA_Humanoid_Strike train=MA_Humanoid_StrikeHRL test=True num_envs=4 minibatch_size=32 headless=False checkpoint='models/Humanoid_Strike/policy.pth' op_checkpoint='models/Humanoid_Strike/policy_op.pth' ``` You can set the opponent agent policy using `op_checkpoint`. If it's empty, the opponent agent will use the same policy as `checkpoint`. We use vectorized models to accelerate the evaluation of policies. Put policies into checkpoint dir, let them compete with each other in parallel: ```bash # run testing for Ant Sumo policy python train.py task=MA_Ant_Sumo train=MA_Ant_SumoPPO test=True headless=True checkpoint='models/ant_sumo' player_pool_type=vectorized ``` There are some specific arguments for self-play evaluation, you can change them in `timechamber/tasks/train/*.yaml`: - `games_num`: Total episode number of evaluation. - `record_elo`: Set `True` to record the ELO rating of your policies, after evaluation, you can check the `elo.jpg` in your checkpoint dir.
- `init_elo`: Initial ELO rating of each policy. ### Building Your Own Task You can build your own task follow [IsaacGymEnvs](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/README.md#creating-an-environment) , make sure the obs shape is correct and`info` contains `win`,`lose`and`draw`: ```python import isaacgym import timechamber import torch envs = timechamber.make( seed=0, task="MA_Ant_Sumo", num_envs=2, sim_device="cuda:0", rl_device="cuda:0", ) # the obs shape should be (num_agents*num_envs,num_obs). # the obs of training agent is (:num_envs,num_obs) print("Observation space is", envs.observation_space) print("Action space is", envs.action_space) obs = envs.reset() for _ in range(20): obs, reward, done, info = envs.step( torch.rand((2 * 2,) + envs.action_space.shape, device="cuda:0") ) # info: # {'win': tensor([Bool, Bool]) # 'lose': tensor([Bool, Bool]) # 'draw': tensor([Bool, Bool])} ``` ## Citing If you use timechamber in your research please use the following citation: ```` @misc{InspirAI, author = {Huang Ziming, Ziyi Liu, Wu Yutong, Flood Sung}, title = {TimeChamber: A Massively Parallel Large Scale Self-Play Framework}, year = {2022}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/inspirai/TimeChamber}}, } ================================================ FILE: assets/mjcf/nv_ant.xml ================================================ ================================================ FILE: docs/environments.md ================================================ ## Environments We provide a detailed description of the environment here. ### Humanoid Strike Humanoid Strike is a 3D environment with two simulated humanoid physics characters. Each character is equipped with a sword and shield with 37 degrees-of-freedom. The game will be restarted if one agent goes outside the arena or the game reaches the maximum episode steps. We measure how much the player damaged the opponent and how much the player was damaged by the opponent in the terminated step to determine the winner. #### Low-Level Observation Space | Index | Description | |:-------:|:------------------------------:| | 0 | Height of the root from the ground. | | 1 - 48 | Position of the body in the character’s local coordinate frame. | | 49 - 150 | Rotation of the body in the character’s local coordinate frame. | | 151 - 201 | Linear velocity of the root in the character’s local coordinate frame. | | 202 - 252 | angular velocity of the root in the character’s local coordinate frame. | #### High-Level Observation Space | Index | Description | |:-------:|:------------------------------:| | 0 - 1 | relative distance from the borderline | | 2 - 4 | relative distance from the opponent | | 5 - 10 | Rotation of the opponent's root in the character’s local coordinate frame. | | 11 - 13 | Linear velocity of the opponent'root in the character’s local coordinate frame. | | 14 - 16 | angular velocity of the opponent'root in the character’s local coordinate frame. | | 17 - 19 | relative distance between ego agent and opponent's sword | | 20 - 22 | Linear velocity of the opponent' sword in the character’s local coordinate frame. | | 23 - 25 | relative distance between ego agent' shield and opponent's sword | | 26 - 28 | relative velocity between ego agent' shield and opponent's sword | | 29 - 31 | relative distance between ego agent' sword and opponent's torse | | 32 - 34 | relative velocity between ego agent' sword and opponent's torse | | 35 - 37 | relative distance between ego agent' sword and opponent's head | | 38 - 40 | relative velocity between ego agent' sword and opponent's head | | 41 - 43 | relative distance between ego agent' sword and opponent's right arm | | 44 - 46 | relative distance between ego agent' sword and opponent's right thigh | | 47 - 49 | relative distance between ego agent' sword and opponent's left thigh | #### Low-Level Action Space | Index | Description | |:-----:|:-----------------:| | 0 - 30 | target rotations of each character’s joints | #### High-Level Action Space | Index | Description | |:-----:|:-----------------:| | 0 - 63 | latent skill variables | #### Rewards The weights of reward components are as follows: ```python op_fall_reward_w = 200.0 ego_fall_out_reward_w = 50.0 shield_to_sword_pos_reward_w = 1.0 damage_reward_w = 8.0 sword_to_op_reward_w = 0.8 reward_energy_w = 3.0 reward_strike_vel_acc_w = 3.0 reward_face_w = 4.0 reward_foot_to_op_w = 10.0 reward_kick_w = 2.0 ``` ### Ant Sumo Ant Sumo is a 3D environment with simulated physics that allows pairs of ant agents to compete against each other. To win, the agent has to push the opponent out of the ring. Every agent has 100 hp . Each step, If the agent's body touches the ground, its hp will be reduced by 1.The agent whose hp becomes 0 will be eliminated. #### Observation Space | Index | Description | |:-------:|:------------------------------:| | 0 - 2 | self pose | | 3 - 6 | self rotation | | 7 - 9 | self linear velocity | | 10 - 12 | self angle velocity | | 13 - 20 | self dof pos | | 21 - 28 | self dof velocity | | 29 - 31 | opponent pose | | 32 - 35 | opponent rotation | | 36 - 37 | self-opponent pose vector(x,y) | | 38 | is self body touch ground | | 39 | is opponent body touch ground | #### Action Space | Index | Description | |:-----:|:-----------------:| | 0 - 7 | self dof position | #### Rewards The reward consists of two parts:sparse reward and dense reward. ```python win_reward = 2000 lose_penalty = -2000 draw_penalty = -1000 dense_reward_scale = 1. dof_at_limit_cost = torch.sum(obs_buf[:, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale push_reward = -push_scale * torch.exp(-torch.linalg.norm(obs_buf_op[:, :2], dim=-1)) action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale not_move_penalty = -10 * torch.exp(-torch.sum(torch.abs(torques), dim=1)) dense_reward = move_reward + dof_at_limit_cost + push_reward + action_cost_penalty + not_move_penalty total_reward = win_reward + lose_penalty + draw_penalty + dense_reward * dense_reward_scale ``` ### Ant Battle Ant Battle is an expanded environment of Ant Sumo. It supports more than two agents competing against with each other. The battle ring radius will shrink, the agent going out of the ring will be eliminated. #### Observation Space | Index | Description | |:-------:|:--------------------------------------:| | 0 - 2 | self pose | | 3 - 6 | self rotation | | 7 - 9 | self linear velocity | | 10 - 12 | self angle velocity | | 13 - 20 | self dof pos | | 21 - 28 | self dof velocity | | 29 | border radius-self dis to centre | | 30 | border radius | | 31 | is self body touch ground | | 32 - 34 | opponent_1 pose | | 35 - 38 | opponent_1 rotation | | 39 - 40 | self-opponent_1 pose vector(x,y) | | 41 - 48 | opponent_1 dof pose | | 49 - 56 | opponent_1 dof velocity | | 57 | border radius-opponent_1 dis to centre | | 58 | is opponent_1 body touch ground | | ... | ... | #### Action Space | Index | Description | |:-----:|:-----------------:| | 0 - 7 | self dof position | #### Rewards The reward consists of two parts:sparse reward and dense reward. ```python win_reward_scale = 2000 reward_per_rank = 2 * win_reward_scale / (num_agents - 1) sparse_reward = sparse_reward * (win_reward_scale - (nxt_rank[:, 0] - 1) * reward_per_rank) stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs[0, :, :2], dim=-1)) dof_at_limit_cost = torch.sum(obs[0, :, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale not_move_penalty = torch.exp(-torch.sum(torch.abs(torques), dim=1)) dense_reward = dof_at_limit_cost + action_cost_penalty + not_move_penalty + stay_in_center_reward total_reward = sparse_reward + dense_reward * dense_reward_scale ``` ================================================ FILE: setup.py ================================================ """Installation script for the 'timechamber' python package.""" from __future__ import absolute_import from __future__ import print_function from __future__ import division from setuptools import setup, find_packages import os root_dir = os.path.dirname(os.path.realpath(__file__)) # Minimum dependencies required prior to installation INSTALL_REQUIRES = [ # RL "gym==0.24", "torch", "omegaconf", "termcolor", "dill", "hydra-core>=1.1", "rl-games==1.5.2", "pyvirtualdisplay", "multielo @ git+https://github.com/djcunningham0/multielo.git@440f7922b90ff87009f8283d6491eb0f704e6624", "matplotlib==3.5.2", "pytest==7.1.2", ] # Installation operation setup( name="timechamber", author="ZeldaHuang, Ziyi Liu", version="0.0.1", description="A Massively Parallel Large Scale Self-Play Framework", keywords=["robotics", "rl"], include_package_data=True, python_requires=">=3.6.*", install_requires=INSTALL_REQUIRES, packages=find_packages("."), classifiers=["Natural Language :: English", "Programming Language :: Python :: 3.7, 3.8"], zip_safe=False, ) # EOF ================================================ FILE: timechamber/__init__.py ================================================ import hydra from hydra import compose, initialize from hydra.core.hydra_config import HydraConfig from omegaconf import DictConfig, OmegaConf from timechamber.utils.reformat import omegaconf_to_dict OmegaConf.register_new_resolver('eq', lambda x, y: x.lower()==y.lower()) OmegaConf.register_new_resolver('contains', lambda x, y: x.lower() in y.lower()) OmegaConf.register_new_resolver('if', lambda pred, a, b: a if pred else b) OmegaConf.register_new_resolver('resolve_default', lambda default, arg: default if arg=='' else arg) def make( seed: int, task: str, num_envs: int, sim_device: str, rl_device: str, graphics_device_id: int = -1, device_type: str = "cuda", headless: bool = False, multi_gpu: bool = False, virtual_screen_capture: bool = False, force_render: bool = True, cfg: DictConfig = None ): from timechamber.utils.rlgames_utils import get_rlgames_env_creator # create hydra config if no config passed in if cfg is None: # reset current hydra config if already parsed (but not passed in here) if HydraConfig.initialized(): task = HydraConfig.get().runtime.choices['task'] hydra.core.global_hydra.GlobalHydra.instance().clear() with initialize(config_path="./cfg"): cfg = compose(config_name="config", overrides=[f"task={task}"]) task_dict = omegaconf_to_dict(cfg.task) task_dict['env']['numEnvs'] = num_envs # reuse existing config else: task_dict = omegaconf_to_dict(cfg.task) task_dict['seed'] = cfg.seed task_dict['rl_device'] = rl_device if cfg.motion_file: task_dict['env']['motion_file'] = cfg.motion_file create_rlgpu_env = get_rlgames_env_creator( seed=seed, cfg=cfg, task_config=task_dict, task_name=task_dict["name"], sim_device=sim_device, rl_device=rl_device, graphics_device_id=graphics_device_id, headless=headless, device_type=device_type, multi_gpu=multi_gpu, virtual_screen_capture=virtual_screen_capture, force_render=force_render, ) return create_rlgpu_env() ================================================ FILE: timechamber/ase/ase_agent.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch import torch.nn as nn from isaacgym.torch_utils import * from rl_games.algos_torch import torch_ext from rl_games.common import a2c_common from rl_games.algos_torch.running_mean_std import RunningMeanStd from timechamber.ase import ase_network_builder from timechamber.ase.utils import amp_agent class ASEAgent(amp_agent.AMPAgent): def __init__(self, base_name, config): super().__init__(base_name, config) return def init_tensors(self): super().init_tensors() batch_shape = self.experience_buffer.obs_base_shape self.experience_buffer.tensor_dict['ase_latents'] = torch.zeros(batch_shape + (self._latent_dim,), dtype=torch.float32, device=self.ppo_device) self._ase_latents = torch.zeros((batch_shape[-1], self._latent_dim), dtype=torch.float32, device=self.ppo_device) self.tensor_list += ['ase_latents'] self._latent_reset_steps = torch.zeros(batch_shape[-1], dtype=torch.int32, device=self.ppo_device) num_envs = self.vec_env.env.task.num_envs env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device) self._reset_latent_step_count(env_ids) return def play_steps(self): self.set_eval() epinfos = [] done_indices = [] update_list = self.update_list for n in range(self.horizon_length): self.obs = self.env_reset(done_indices) self.experience_buffer.update_data('obses', n, self.obs['obs']) self._update_latents() if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, self._ase_latents, masks) else: res_dict = self.get_action_values(self.obs, self._ase_latents, self._rand_action_probs) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) shaped_rewards = self.rewards_shaper(rewards) self.experience_buffer.update_data('rewards', n, shaped_rewards) self.experience_buffer.update_data('next_obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) self.experience_buffer.update_data('amp_obs', n, infos['amp_obs']) self.experience_buffer.update_data('ase_latents', n, self._ase_latents) self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask']) terminated = infos['terminate'].float() terminated = terminated.unsqueeze(-1) next_vals = self._eval_critic(self.obs, self._ase_latents) next_vals *= (1.0 - terminated) self.experience_buffer.update_data('next_values', n, next_vals) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] self.game_rewards.update(self.current_rewards[done_indices]) self.game_lengths.update(self.current_lengths[done_indices]) self.algo_observer.process_infos(infos, done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones if (self.vec_env.env.task.viewer): self._amp_debug(infos, self._ase_latents) done_indices = done_indices[:, 0] mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_next_values = self.experience_buffer.tensor_dict['next_values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs'] mb_ase_latents = self.experience_buffer.tensor_dict['ase_latents'] amp_rewards = self._calc_amp_rewards(mb_amp_obs, mb_ase_latents) mb_rewards = self._combine_rewards(mb_rewards, amp_rewards) mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size for k, v in amp_rewards.items(): batch_dict[k] = a2c_common.swap_and_flatten01(v) return batch_dict def get_action_values(self, obs_dict, ase_latents, rand_action_probs): processed_obs = self._preproc_obs(obs_dict['obs']) self.model.eval() input_dict = { 'is_train': False, 'prev_actions': None, 'obs' : processed_obs, 'rnn_states' : self.rnn_states, 'ase_latents': ase_latents } with torch.no_grad(): res_dict = self.model(input_dict) if self.has_central_value: states = obs_dict['states'] input_dict = { 'is_train': False, 'states' : states, } value = self.get_central_value(input_dict) res_dict['values'] = value if self.normalize_value: res_dict['values'] = self.value_mean_std(res_dict['values'], True) rand_action_mask = torch.bernoulli(rand_action_probs) det_action_mask = rand_action_mask == 0.0 res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask] res_dict['rand_action_mask'] = rand_action_mask return res_dict def prepare_dataset(self, batch_dict): super().prepare_dataset(batch_dict) ase_latents = batch_dict['ase_latents'] self.dataset.values_dict['ase_latents'] = ase_latents return def calc_gradients(self, input_dict): self.set_train() value_preds_batch = input_dict['old_values'] old_action_log_probs_batch = input_dict['old_logp_actions'] advantage = input_dict['advantages'] old_mu_batch = input_dict['mu'] old_sigma_batch = input_dict['sigma'] return_batch = input_dict['returns'] actions_batch = input_dict['actions'] obs_batch = input_dict['obs'] obs_batch = self._preproc_obs(obs_batch) amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size] amp_obs = self._preproc_amp_obs(amp_obs) if (self._enable_enc_grad_penalty()): amp_obs.requires_grad_(True) amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size] amp_obs_replay = self._preproc_amp_obs(amp_obs_replay) amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size] amp_obs_demo = self._preproc_amp_obs(amp_obs_demo) amp_obs_demo.requires_grad_(True) ase_latents = input_dict['ase_latents'] rand_action_mask = input_dict['rand_action_mask'] rand_action_sum = torch.sum(rand_action_mask) lr = self.last_lr kl = 1.0 lr_mul = 1.0 curr_e_clip = lr_mul * self.e_clip batch_dict = { 'is_train': True, 'prev_actions': actions_batch, 'obs' : obs_batch, 'amp_obs' : amp_obs, 'amp_obs_replay' : amp_obs_replay, 'amp_obs_demo' : amp_obs_demo, 'ase_latents': ase_latents } rnn_masks = None if self.is_rnn: rnn_masks = input_dict['rnn_masks'] batch_dict['rnn_states'] = input_dict['rnn_states'] batch_dict['seq_length'] = self.seq_len rnn_masks = None if self.is_rnn: rnn_masks = input_dict['rnn_masks'] batch_dict['rnn_states'] = input_dict['rnn_states'] batch_dict['seq_length'] = self.seq_len with torch.cuda.amp.autocast(enabled=self.mixed_precision): res_dict = self.model(batch_dict) action_log_probs = res_dict['prev_neglogp'] values = res_dict['values'] entropy = res_dict['entropy'] mu = res_dict['mus'] sigma = res_dict['sigmas'] disc_agent_logit = res_dict['disc_agent_logit'] disc_agent_replay_logit = res_dict['disc_agent_replay_logit'] disc_demo_logit = res_dict['disc_demo_logit'] enc_pred = res_dict['enc_pred'] a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) a_loss = a_info['actor_loss'] a_clipped = a_info['actor_clipped'].float() c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) c_loss = c_info['critic_loss'] b_loss = self.bound_loss(mu) c_loss = torch.mean(c_loss) a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0) disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo) disc_loss = disc_info['disc_loss'] enc_latents = batch_dict['ase_latents'][0:self._amp_minibatch_size] enc_loss_mask = rand_action_mask[0:self._amp_minibatch_size] enc_info = self._enc_loss(enc_pred, enc_latents, batch_dict['amp_obs'], enc_loss_mask) enc_loss = enc_info['enc_loss'] loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \ + self._disc_coef * disc_loss + self._enc_coef * enc_loss if (self._enable_amp_diversity_bonus()): diversity_loss = self._diversity_loss(batch_dict['obs'], mu, batch_dict['ase_latents']) diversity_loss = torch.sum(rand_action_mask * diversity_loss) / rand_action_sum loss += self._amp_diversity_bonus * diversity_loss a_info['amp_diversity_loss'] = diversity_loss a_info['actor_loss'] = a_loss a_info['actor_clip_frac'] = a_clip_frac c_info['critic_loss'] = c_loss if self.multi_gpu: self.optimizer.zero_grad() else: for param in self.model.parameters(): param.grad = None self.scaler.scale(loss).backward() #TODO: Refactor this ugliest code of the year if self.truncate_grads: if self.multi_gpu: self.optimizer.synchronize() self.scaler.unscale_(self.optimizer) nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) with self.optimizer.skip_synchronize(): self.scaler.step(self.optimizer) self.scaler.update() else: self.scaler.unscale_(self.optimizer) nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) self.scaler.step(self.optimizer) self.scaler.update() else: self.scaler.step(self.optimizer) self.scaler.update() with torch.no_grad(): reduce_kl = not self.is_rnn kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) if self.is_rnn: kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask self.train_result = { 'entropy': entropy, 'kl': kl_dist, 'last_lr': self.last_lr, 'lr_mul': lr_mul, 'b_loss': b_loss } self.train_result.update(a_info) self.train_result.update(c_info) self.train_result.update(disc_info) self.train_result.update(enc_info) return def env_reset(self, env_ids=None): obs = super().env_reset(env_ids) if (env_ids is None): num_envs = self.vec_env.env.task.num_envs env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device) if (len(env_ids) > 0): self._reset_latents(env_ids) self._reset_latent_step_count(env_ids) return obs def _reset_latent_step_count(self, env_ids): self._latent_reset_steps[env_ids] = torch.randint_like(self._latent_reset_steps[env_ids], low=self._latent_steps_min, high=self._latent_steps_max) return def _load_config_params(self, config): super()._load_config_params(config) self._latent_dim = config['latent_dim'] self._latent_steps_min = config.get('latent_steps_min', np.inf) self._latent_steps_max = config.get('latent_steps_max', np.inf) self._latent_dim = config['latent_dim'] self._amp_diversity_bonus = config['amp_diversity_bonus'] self._amp_diversity_tar = config['amp_diversity_tar'] self._enc_coef = config['enc_coef'] self._enc_weight_decay = config['enc_weight_decay'] self._enc_reward_scale = config['enc_reward_scale'] self._enc_grad_penalty = config['enc_grad_penalty'] self._enc_reward_w = config['enc_reward_w'] return def _build_net_config(self): config = super()._build_net_config() config['ase_latent_shape'] = (self._latent_dim,) return config def _reset_latents(self, env_ids): n = len(env_ids) z = self._sample_latents(n) self._ase_latents[env_ids] = z if (self.vec_env.env.task.viewer): self._change_char_color(env_ids) return def _sample_latents(self, n): z = self.model.a2c_network.sample_latents(n) return z def _update_latents(self): new_latent_envs = self._latent_reset_steps <= self.vec_env.env.task.progress_buf need_update = torch.any(new_latent_envs) if (need_update): new_latent_env_ids = new_latent_envs.nonzero(as_tuple=False).flatten() self._reset_latents(new_latent_env_ids) self._latent_reset_steps[new_latent_env_ids] += torch.randint_like(self._latent_reset_steps[new_latent_env_ids], low=self._latent_steps_min, high=self._latent_steps_max) if (self.vec_env.env.task.viewer): self._change_char_color(new_latent_env_ids) return def _eval_actor(self, obs, ase_latents): output = self.model.eval_actor(obs=obs, ase_latents=ase_latents) return output def _eval_critic(self, obs_dict, ase_latents): self.model.eval() obs = obs_dict['obs'] processed_obs = self._preproc_obs(obs) value = self.model.eval_critic(processed_obs, ase_latents) if self.normalize_value: value = self.value_mean_std(value, True) return value def _calc_amp_rewards(self, amp_obs, ase_latents): disc_r = self._calc_disc_rewards(amp_obs) enc_r = self._calc_enc_rewards(amp_obs, ase_latents) output = { 'disc_rewards': disc_r, 'enc_rewards': enc_r } return output def _calc_enc_rewards(self, amp_obs, ase_latents): with torch.no_grad(): enc_pred = self._eval_enc(amp_obs) err = self._calc_enc_error(enc_pred, ase_latents) enc_r = torch.clamp_min(-err, 0.0) enc_r *= self._enc_reward_scale return enc_r def _enc_loss(self, enc_pred, ase_latent, enc_obs, loss_mask): enc_err = self._calc_enc_error(enc_pred, ase_latent) #mask_sum = torch.sum(loss_mask) #enc_err = enc_err.squeeze(-1) #enc_loss = torch.sum(loss_mask * enc_err) / mask_sum enc_loss = torch.mean(enc_err) # weight decay if (self._enc_weight_decay != 0): enc_weights = self.model.a2c_network.get_enc_weights() enc_weights = torch.cat(enc_weights, dim=-1) enc_weight_decay = torch.sum(torch.square(enc_weights)) enc_loss += self._enc_weight_decay * enc_weight_decay enc_info = { 'enc_loss': enc_loss } if (self._enable_enc_grad_penalty()): enc_obs_grad = torch.autograd.grad(enc_err, enc_obs, grad_outputs=torch.ones_like(enc_err), create_graph=True, retain_graph=True, only_inputs=True) enc_obs_grad = enc_obs_grad[0] enc_obs_grad = torch.sum(torch.square(enc_obs_grad), dim=-1) #enc_grad_penalty = torch.sum(loss_mask * enc_obs_grad) / mask_sum enc_grad_penalty = torch.mean(enc_obs_grad) enc_loss += self._enc_grad_penalty * enc_grad_penalty enc_info['enc_grad_penalty'] = enc_grad_penalty.detach() return enc_info def _diversity_loss(self, obs, action_params, ase_latents): assert(self.model.a2c_network.is_continuous) n = obs.shape[0] assert(n == action_params.shape[0]) new_z = self._sample_latents(n) mu, sigma = self._eval_actor(obs=obs, ase_latents=new_z) clipped_action_params = torch.clamp(action_params, -1.0, 1.0) clipped_mu = torch.clamp(mu, -1.0, 1.0) a_diff = clipped_action_params - clipped_mu a_diff = torch.mean(torch.square(a_diff), dim=-1) z_diff = new_z * ase_latents z_diff = torch.sum(z_diff, dim=-1) z_diff = 0.5 - 0.5 * z_diff diversity_bonus = a_diff / (z_diff + 1e-5) diversity_loss = torch.square(self._amp_diversity_tar - diversity_bonus) return diversity_loss def _calc_enc_error(self, enc_pred, ase_latent): err = enc_pred * ase_latent err = -torch.sum(err, dim=-1, keepdim=True) return err def _enable_enc_grad_penalty(self): return self._enc_grad_penalty != 0 def _enable_amp_diversity_bonus(self): return self._amp_diversity_bonus != 0 def _eval_enc(self, amp_obs): proc_amp_obs = self._preproc_amp_obs(amp_obs) return self.model.a2c_network.eval_enc(proc_amp_obs) def _combine_rewards(self, task_rewards, amp_rewards): disc_r = amp_rewards['disc_rewards'] enc_r = amp_rewards['enc_rewards'] combined_rewards = self._task_reward_w * task_rewards \ + self._disc_reward_w * disc_r \ + self._enc_reward_w * enc_r return combined_rewards def _record_train_batch_info(self, batch_dict, train_info): super()._record_train_batch_info(batch_dict, train_info) train_info['enc_rewards'] = batch_dict['enc_rewards'] return def _log_train_info(self, train_info, frame): super()._log_train_info(train_info, frame) self.writer.add_scalar('losses/enc_loss', torch_ext.mean_list(train_info['enc_loss']).item(), frame) if (self._enable_amp_diversity_bonus()): self.writer.add_scalar('losses/amp_diversity_loss', torch_ext.mean_list(train_info['amp_diversity_loss']).item(), frame) enc_reward_std, enc_reward_mean = torch.std_mean(train_info['enc_rewards']) self.writer.add_scalar('info/enc_reward_mean', enc_reward_mean.item(), frame) self.writer.add_scalar('info/enc_reward_std', enc_reward_std.item(), frame) if (self._enable_enc_grad_penalty()): self.writer.add_scalar('info/enc_grad_penalty', torch_ext.mean_list(train_info['enc_grad_penalty']).item(), frame) return def _change_char_color(self, env_ids): base_col = np.array([0.4, 0.4, 0.4]) range_col = np.array([0.0706, 0.149, 0.2863]) range_sum = np.linalg.norm(range_col) rand_col = np.random.uniform(0.0, 1.0, size=3) rand_col = range_sum * rand_col / np.linalg.norm(rand_col) rand_col += base_col self.vec_env.env.task.set_char_color(rand_col, env_ids) return def _amp_debug(self, info, ase_latents): with torch.no_grad(): amp_obs = info['amp_obs'] amp_obs = amp_obs ase_latents = ase_latents disc_pred = self._eval_disc(amp_obs) amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents) disc_reward = amp_rewards['disc_rewards'] enc_reward = amp_rewards['enc_rewards'] disc_pred = disc_pred.detach().cpu().numpy()[0, 0] disc_reward = disc_reward.cpu().numpy()[0, 0] enc_reward = enc_reward.cpu().numpy()[0, 0] print("disc_pred: ", disc_pred, disc_reward, enc_reward) return ================================================ FILE: timechamber/ase/ase_models.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from timechamber.ase.utils import amp_models class ModelASEContinuous(amp_models.ModelAMPContinuous): def __init__(self, network): super().__init__(network) return def build(self, config): net = self.network_builder.build('ase', **config) for name, _ in net.named_parameters(): print(name) # print(f"ASE config: {config}") obs_shape = config['input_shape'] normalize_value = config.get('normalize_value', False) normalize_input = config.get('normalize_input', False) value_size = config.get('value_size', 1) return ModelASEContinuous.Network(net,obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) class Network(amp_models.ModelAMPContinuous.Network): def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size): super().__init__(a2c_network, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) return def forward(self, input_dict): is_train = input_dict.get('is_train', True) result = super().forward(input_dict) if (is_train): amp_obs = input_dict['amp_obs'] enc_pred = self.a2c_network.eval_enc(amp_obs) result["enc_pred"] = enc_pred return result def eval_actor(self, obs, ase_latents, use_hidden_latents=False): processed_obs = self.norm_obs(obs) mu, sigma = self.a2c_network.eval_actor(obs=processed_obs, ase_latents=ase_latents) return mu, sigma def eval_critic(self, obs, ase_latents, use_hidden_latents=False): processed_obs = self.norm_obs(obs) value = self.a2c_network.eval_critic(processed_obs, ase_latents, use_hidden_latents) return value ================================================ FILE: timechamber/ase/ase_network_builder.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from rl_games.algos_torch import torch_ext from rl_games.algos_torch import layers from rl_games.algos_torch import network_builder import torch import torch.nn as nn import numpy as np import enum from timechamber.ase.utils import amp_network_builder ENC_LOGIT_INIT_SCALE = 0.1 class LatentType(enum.Enum): uniform = 0 sphere = 1 class ASEBuilder(amp_network_builder.AMPBuilder): def __init__(self, **kwargs): super().__init__(**kwargs) return class Network(amp_network_builder.AMPBuilder.Network): def __init__(self, params, **kwargs): actions_num = kwargs.get('actions_num') input_shape = kwargs.get('input_shape') self.value_size = kwargs.get('value_size', 1) self.num_seqs = num_seqs = kwargs.get('num_seqs', 1) amp_input_shape = kwargs.get('amp_input_shape') self._ase_latent_shape = kwargs.get('ase_latent_shape') network_builder.NetworkBuilder.BaseNetwork.__init__(self) self.load(params) actor_out_size, critic_out_size = self._build_actor_critic_net(input_shape, self._ase_latent_shape) self.value = torch.nn.Linear(critic_out_size, self.value_size) self.value_act = self.activations_factory.create(self.value_activation) if self.is_discrete: self.logits = torch.nn.Linear(actor_out_size, actions_num) ''' for multidiscrete actions num is a tuple ''' if self.is_multi_discrete: self.logits = torch.nn.ModuleList([torch.nn.Linear(actor_out_size, num) for num in actions_num]) if self.is_continuous: self.mu = torch.nn.Linear(actor_out_size, actions_num) self.mu_act = self.activations_factory.create(self.space_config['mu_activation']) mu_init = self.init_factory.create(**self.space_config['mu_init']) self.sigma_act = self.activations_factory.create(self.space_config['sigma_activation']) sigma_init = self.init_factory.create(**self.space_config['sigma_init']) if (not self.space_config['learn_sigma']): self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False) elif self.space_config['fixed_sigma']: self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True) else: self.sigma = torch.nn.Linear(actor_out_size, actions_num) mlp_init = self.init_factory.create(**self.initializer) if self.has_cnn: cnn_init = self.init_factory.create(**self.cnn['initializer']) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): cnn_init(m.weight) if getattr(m, "bias", None) is not None: torch.nn.init.zeros_(m.bias) if isinstance(m, nn.Linear): mlp_init(m.weight) if getattr(m, "bias", None) is not None: torch.nn.init.zeros_(m.bias) self.actor_mlp.init_params() self.critic_mlp.init_params() if self.is_continuous: mu_init(self.mu.weight) if self.space_config['fixed_sigma']: sigma_init(self.sigma) else: sigma_init(self.sigma.weight) self._build_disc(amp_input_shape) self._build_enc(amp_input_shape) return def load(self, params): super().load(params) self._enc_units = params['enc']['units'] self._enc_activation = params['enc']['activation'] self._enc_initializer = params['enc']['initializer'] self._enc_separate = params['enc']['separate'] return def forward(self, obs_dict): obs = obs_dict['obs'] ase_latents = obs_dict['ase_latents'] states = obs_dict.get('rnn_states', None) use_hidden_latents = obs_dict.get('use_hidden_latents', False) actor_outputs = self.eval_actor(obs, ase_latents, use_hidden_latents) value = self.eval_critic(obs, ase_latents, use_hidden_latents) output = actor_outputs + (value, states) return output def eval_critic(self, obs, ase_latents, use_hidden_latents=False): c_out = self.critic_cnn(obs) c_out = c_out.contiguous().view(c_out.size(0), -1) c_out = self.critic_mlp(c_out, ase_latents, use_hidden_latents) value = self.value_act(self.value(c_out)) return value def eval_actor(self, obs, ase_latents, use_hidden_latents=False): a_out = self.actor_cnn(obs) a_out = a_out.contiguous().view(a_out.size(0), -1) a_out = self.actor_mlp(a_out, ase_latents, use_hidden_latents) if self.is_discrete: logits = self.logits(a_out) return logits if self.is_multi_discrete: logits = [logit(a_out) for logit in self.logits] return logits if self.is_continuous: mu = self.mu_act(self.mu(a_out)) if self.space_config['fixed_sigma']: sigma = mu * 0.0 + self.sigma_act(self.sigma) else: sigma = self.sigma_act(self.sigma(a_out)) return mu, sigma return def get_enc_weights(self): weights = [] for m in self._enc_mlp.modules(): if isinstance(m, nn.Linear): weights.append(torch.flatten(m.weight)) weights.append(torch.flatten(self._enc.weight)) return weights def _build_actor_critic_net(self, input_shape, ase_latent_shape): style_units = [512, 256] style_dim = ase_latent_shape[-1] self.actor_cnn = nn.Sequential() self.critic_cnn = nn.Sequential() act_fn = self.activations_factory.create(self.activation) initializer = self.init_factory.create(**self.initializer) self.actor_mlp = AMPStyleCatNet1(obs_size=input_shape[-1], ase_latent_size=ase_latent_shape[-1], units=self.units, activation=act_fn, style_units=style_units, style_dim=style_dim, initializer=initializer) if self.separate: self.critic_mlp = AMPMLPNet(obs_size=input_shape[-1], ase_latent_size=ase_latent_shape[-1], units=self.units, activation=act_fn, initializer=initializer) actor_out_size = self.actor_mlp.get_out_size() critic_out_size = self.critic_mlp.get_out_size() return actor_out_size, critic_out_size def _build_enc(self, input_shape): if (self._enc_separate): self._enc_mlp = nn.Sequential() mlp_args = { 'input_size' : input_shape[0], 'units' : self._enc_units, 'activation' : self._enc_activation, 'dense_func' : torch.nn.Linear } self._enc_mlp = self._build_mlp(**mlp_args) mlp_init = self.init_factory.create(**self._enc_initializer) for m in self._enc_mlp.modules(): if isinstance(m, nn.Linear): mlp_init(m.weight) if getattr(m, "bias", None) is not None: torch.nn.init.zeros_(m.bias) else: self._enc_mlp = self._disc_mlp mlp_out_layer = list(self._enc_mlp.modules())[-2] mlp_out_size = mlp_out_layer.out_features self._enc = torch.nn.Linear(mlp_out_size, self._ase_latent_shape[-1]) torch.nn.init.uniform_(self._enc.weight, -ENC_LOGIT_INIT_SCALE, ENC_LOGIT_INIT_SCALE) torch.nn.init.zeros_(self._enc.bias) return def eval_enc(self, amp_obs): enc_mlp_out = self._enc_mlp(amp_obs) enc_output = self._enc(enc_mlp_out) enc_output = torch.nn.functional.normalize(enc_output, dim=-1) return enc_output def sample_latents(self, n): device = next(self._enc.parameters()).device z = torch.normal(torch.zeros([n, self._ase_latent_shape[-1]], device=device)) z = torch.nn.functional.normalize(z, dim=-1) return z def build(self, name, **kwargs): net = ASEBuilder.Network(self.params, **kwargs) return net class AMPMLPNet(torch.nn.Module): def __init__(self, obs_size, ase_latent_size, units, activation, initializer): super().__init__() input_size = obs_size + ase_latent_size print('build amp mlp net:', input_size) self._units = units self._initializer = initializer self._mlp = [] in_size = input_size for i in range(len(units)): unit = units[i] curr_dense = torch.nn.Linear(in_size, unit) self._mlp.append(curr_dense) self._mlp.append(activation) in_size = unit self._mlp = nn.Sequential(*self._mlp) self.init_params() return def forward(self, obs, latent, skip_style): inputs = [obs, latent] input = torch.cat(inputs, dim=-1) output = self._mlp(input) return output def init_params(self): for m in self.modules(): if isinstance(m, nn.Linear): self._initializer(m.weight) if getattr(m, "bias", None) is not None: torch.nn.init.zeros_(m.bias) return def get_out_size(self): out_size = self._units[-1] return out_size class AMPStyleCatNet1(torch.nn.Module): def __init__(self, obs_size, ase_latent_size, units, activation, style_units, style_dim, initializer): super().__init__() print('build amp style cat net:', obs_size, ase_latent_size) self._activation = activation self._initializer = initializer self._dense_layers = [] self._units = units self._style_dim = style_dim self._style_activation = torch.tanh self._style_mlp = self._build_style_mlp(style_units, ase_latent_size) self._style_dense = torch.nn.Linear(style_units[-1], style_dim) in_size = obs_size + style_dim for i in range(len(units)): unit = units[i] out_size = unit curr_dense = torch.nn.Linear(in_size, out_size) self._dense_layers.append(curr_dense) in_size = out_size self._dense_layers = nn.ModuleList(self._dense_layers) self.init_params() return def forward(self, obs, latent, skip_style): if (skip_style): style = latent else: style = self.eval_style(latent) h = torch.cat([obs, style], dim=-1) for i in range(len(self._dense_layers)): curr_dense = self._dense_layers[i] h = curr_dense(h) h = self._activation(h) return h def eval_style(self, latent): style_h = self._style_mlp(latent) style = self._style_dense(style_h) style = self._style_activation(style) return style def init_params(self): scale_init_range = 1.0 for m in self.modules(): if isinstance(m, nn.Linear): self._initializer(m.weight) if getattr(m, "bias", None) is not None: torch.nn.init.zeros_(m.bias) nn.init.uniform_(self._style_dense.weight, -scale_init_range, scale_init_range) return def get_out_size(self): out_size = self._units[-1] return out_size def _build_style_mlp(self, style_units, input_size): in_size = input_size layers = [] for unit in style_units: layers.append(torch.nn.Linear(in_size, unit)) layers.append(self._activation) in_size = unit enc_mlp = nn.Sequential(*layers) return enc_mlp ================================================ FILE: timechamber/ase/ase_players.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from pytest import param import torch from isaacgym.torch_utils import * from rl_games.algos_torch import players from timechamber.ase.utils import amp_players import timechamber.ase.ase_network_builder as ase_network_builder class ASEPlayer(amp_players.AMPPlayerContinuous): def __init__(self, params): config = params['config'] self._latent_dim = config['latent_dim'] self._latent_steps_min = config.get('latent_steps_min', np.inf) self._latent_steps_max = config.get('latent_steps_max', np.inf) self._enc_reward_scale = config['enc_reward_scale'] super().__init__(params) if (hasattr(self, 'env')) and self.env is not None: batch_size = self.env.task.num_envs else: batch_size = self.env_info['num_envs'] self._ase_latents = torch.zeros((batch_size, self._latent_dim), dtype=torch.float32, device=self.device) return def run(self): self._reset_latent_step_count() super().run() return def get_action(self, obs_dict, is_determenistic=False): self._update_latents() obs = obs_dict['obs'] if len(obs.size()) == len(self.obs_shape): obs = obs.unsqueeze(0) obs = self._preproc_obs(obs) ase_latents = self._ase_latents input_dict = { 'is_train': False, 'prev_actions': None, 'obs' : obs, 'rnn_states' : self.states, 'ase_latents': ase_latents } with torch.no_grad(): res_dict = self.model(input_dict) mu = res_dict['mus'] action = res_dict['actions'] self.states = res_dict['rnn_states'] if is_determenistic: current_action = mu else: current_action = action current_action = torch.squeeze(current_action.detach()) return players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0)) def env_reset(self, env_ids=None): obs = super().env_reset(env_ids) self._reset_latents(env_ids) return obs def _build_net_config(self): config = super()._build_net_config() config['ase_latent_shape'] = (self._latent_dim,) return config def _reset_latents(self, done_env_ids=None): if (done_env_ids is None): num_envs = self.env.task.num_envs done_env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device) rand_vals = self.model.a2c_network.sample_latents(len(done_env_ids)) self._ase_latents[done_env_ids] = rand_vals self._change_char_color(done_env_ids) return def _update_latents(self): if (self._latent_step_count <= 0): self._reset_latents() self._reset_latent_step_count() if (self.env.task.viewer): print("Sampling new amp latents------------------------------") num_envs = self.env.task.num_envs env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device) self._change_char_color(env_ids) else: self._latent_step_count -= 1 return def _reset_latent_step_count(self): self._latent_step_count = np.random.randint(self._latent_steps_min, self._latent_steps_max) return def _calc_amp_rewards(self, amp_obs, ase_latents): disc_r = self._calc_disc_rewards(amp_obs) enc_r = self._calc_enc_rewards(amp_obs, ase_latents) output = { 'disc_rewards': disc_r, 'enc_rewards': enc_r } return output def _calc_enc_rewards(self, amp_obs, ase_latents): with torch.no_grad(): enc_pred = self._eval_enc(amp_obs) err = self._calc_enc_error(enc_pred, ase_latents) enc_r = torch.clamp_min(-err, 0.0) enc_r *= self._enc_reward_scale return enc_r def _calc_enc_error(self, enc_pred, ase_latent): err = enc_pred * ase_latent err = -torch.sum(err, dim=-1, keepdim=True) return err def _eval_enc(self, amp_obs): proc_amp_obs = self._preproc_amp_obs(amp_obs) return self.model.a2c_network.eval_enc(proc_amp_obs) def _amp_debug(self, info): with torch.no_grad(): amp_obs = info['amp_obs'] amp_obs = amp_obs ase_latents = self._ase_latents disc_pred = self._eval_disc(amp_obs) amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents) disc_reward = amp_rewards['disc_rewards'] enc_reward = amp_rewards['enc_rewards'] disc_pred = disc_pred.detach().cpu().numpy()[0, 0] disc_reward = disc_reward.cpu().numpy()[0, 0] enc_reward = enc_reward.cpu().numpy()[0, 0] print("disc_pred: ", disc_pred, disc_reward, enc_reward) return def _change_char_color(self, env_ids): base_col = np.array([0.4, 0.4, 0.4]) range_col = np.array([0.0706, 0.149, 0.2863]) range_sum = np.linalg.norm(range_col) rand_col = np.random.uniform(0.0, 1.0, size=3) rand_col = range_sum * rand_col / np.linalg.norm(rand_col) rand_col += base_col self.env.task.set_char_color(rand_col, env_ids) return ================================================ FILE: timechamber/ase/hrl_agent.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy from datetime import datetime from distutils.command.config import config from gym import spaces import numpy as np import os import time import yaml from rl_games.algos_torch import torch_ext from rl_games.algos_torch import central_value from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.common import a2c_common from rl_games.common import datasets from rl_games.common import schedulers from rl_games.common import vecenv import torch from torch import optim import timechamber.ase.utils.common_agent as common_agent import timechamber.ase.ase_agent as ase_agent import timechamber.ase.ase_models as ase_models import timechamber.ase.ase_network_builder as ase_network_builder from tensorboardX import SummaryWriter class HRLAgent(common_agent.CommonAgent): def __init__(self, base_name, params): config = params['config'] with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f: llc_config = yaml.load(f, Loader=yaml.SafeLoader) llc_config_params = llc_config['params'] self._latent_dim = llc_config_params['config']['latent_dim'] super().__init__(base_name, params) self._task_size = self.vec_env.env.task.get_task_obs_size() self._llc_steps = config['llc_steps'] llc_checkpoint = config['llc_checkpoint'] assert(llc_checkpoint != "") self._build_llc(llc_config_params, llc_checkpoint) return def env_step(self, actions): actions = self.preprocess_actions(actions) obs = self.obs['obs'] rewards = 0.0 disc_rewards = 0.0 done_count = 0.0 terminate_count = 0.0 for t in range(self._llc_steps): llc_actions = self._compute_llc_action(obs, actions) obs_dict, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions) # TODO obs = obs_dict['obs'] rewards += curr_rewards done_count += curr_dones terminate_count += infos['terminate'] amp_obs = infos['amp_obs'] curr_disc_reward = self._calc_disc_reward(amp_obs) disc_rewards += curr_disc_reward rewards /= self._llc_steps disc_rewards /= self._llc_steps dones = torch.zeros_like(done_count) dones[done_count > 0] = 1.0 terminate = torch.zeros_like(terminate_count) terminate[terminate_count > 0] = 1.0 infos['terminate'] = terminate infos['disc_rewards'] = disc_rewards if self.is_tensor_obses: if self.value_size == 1: rewards = rewards.unsqueeze(1) return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos else: if self.value_size == 1: rewards = np.expand_dims(rewards, axis=1) return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(dones).to(self.ppo_device), infos def cast_obs(self, obs): obs = super().cast_obs(obs) self._llc_agent.is_tensor_obses = self.is_tensor_obses return obs def preprocess_actions(self, actions): clamped_actions = torch.clamp(actions, -1.0, 1.0) if not self.is_tensor_obses: clamped_actions = clamped_actions.cpu().numpy() return clamped_actions def play_steps(self): self.set_eval() epinfos = [] done_indices = torch.tensor([], device=self.device, dtype=torch.long) update_list = self.update_list for n in range(self.horizon_length): self.obs = self.env_reset(done_indices) self.experience_buffer.update_data('obses', n, self.obs['obs']) if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, masks) else: res_dict = self.get_action_values(self.obs) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) shaped_rewards = self.rewards_shaper(rewards) self.experience_buffer.update_data('rewards', n, shaped_rewards) self.experience_buffer.update_data('next_obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards']) terminated = infos['terminate'].float() terminated = terminated.unsqueeze(-1) next_vals = self._eval_critic(self.obs) next_vals *= (1.0 - terminated) self.experience_buffer.update_data('next_values', n, next_vals) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] self.game_rewards.update(self.current_rewards[done_indices]) self.game_lengths.update(self.current_lengths[done_indices]) self.algo_observer.process_infos(infos, done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones done_indices = done_indices[:, 0] mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_next_values = self.experience_buffer.tensor_dict['next_values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards'] mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards) mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size return batch_dict def _load_config_params(self, config): super()._load_config_params(config) self._task_reward_w = config['task_reward_w'] self._disc_reward_w = config['disc_reward_w'] return def _get_mean_rewards(self): rewards = super()._get_mean_rewards() rewards *= self._llc_steps return rewards def _setup_action_space(self): super()._setup_action_space() self.actions_num = self._latent_dim return def init_tensors(self): super().init_tensors() del self.experience_buffer.tensor_dict['actions'] del self.experience_buffer.tensor_dict['mus'] del self.experience_buffer.tensor_dict['sigmas'] batch_shape = self.experience_buffer.obs_base_shape self.experience_buffer.tensor_dict['actions'] = torch.zeros(batch_shape + (self._latent_dim,), dtype=torch.float32, device=self.ppo_device) self.experience_buffer.tensor_dict['mus'] = torch.zeros(batch_shape + (self._latent_dim,), dtype=torch.float32, device=self.ppo_device) self.experience_buffer.tensor_dict['sigmas'] = torch.zeros(batch_shape + (self._latent_dim,), dtype=torch.float32, device=self.ppo_device) self.experience_buffer.tensor_dict['disc_rewards'] = torch.zeros_like(self.experience_buffer.tensor_dict['rewards']) self.tensor_list += ['disc_rewards'] return def _build_llc(self, config_params, checkpoint_file): llc_agent_config = self._build_llc_agent_config(config_params) self._llc_agent = ase_agent.ASEAgent('llc', llc_agent_config) self._llc_agent.restore(checkpoint_file) print("Loaded LLC checkpoint from {:s}".format(checkpoint_file)) self._llc_agent.set_eval() return def _build_llc_agent_config(self, config_params, network=None): llc_env_info = copy.deepcopy(self.env_info) obs_space = llc_env_info['observation_space'] obs_size = obs_space.shape[0] obs_size -= self._task_size llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size]) params = config_params params['config']['network'] = network params['config']['num_actors'] = self.num_actors params['config']['features'] = {'observer' : self.algo_observer} params['config']['env_info'] = llc_env_info params['config']['device'] = self.device return params def _compute_llc_action(self, obs, actions): llc_obs = self._extract_llc_obs(obs) processed_obs = self._llc_agent._preproc_obs(llc_obs) z = torch.nn.functional.normalize(actions, dim=-1) mu, _ = self._llc_agent.model.eval_actor(obs=processed_obs, ase_latents=z) llc_action = mu llc_action = self._llc_agent.preprocess_actions(llc_action) return llc_action def _extract_llc_obs(self, obs): obs_size = obs.shape[-1] llc_obs = obs[..., :obs_size - self._task_size] return llc_obs def _calc_disc_reward(self, amp_obs): disc_reward = self._llc_agent._calc_disc_rewards(amp_obs) return disc_reward def _combine_rewards(self, task_rewards, disc_rewards): combined_rewards = self._task_reward_w * task_rewards + \ + self._disc_reward_w * disc_rewards #combined_rewards = task_rewards * disc_rewards return combined_rewards def _record_train_batch_info(self, batch_dict, train_info): super()._record_train_batch_info(batch_dict, train_info) train_info['disc_rewards'] = batch_dict['disc_rewards'] return def _log_train_info(self, train_info, frame): super()._log_train_info(train_info, frame) disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards']) self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame) self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame) return ================================================ FILE: timechamber/ase/hrl_models.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch.nn as nn from rl_games.algos_torch.models import ModelA2CContinuousLogStd class ModelHRLContinuous(ModelA2CContinuousLogStd): def __init__(self, network): super().__init__(network) return def build(self, config): net = self.network_builder.build('amp', **config) for name, _ in net.named_parameters(): print(name) # print(f"ASE config: {config}") obs_shape = config['input_shape'] normalize_value = config.get('normalize_value', False) normalize_input = config.get('normalize_input', False) value_size = config.get('value_size', 1) return ModelHRLContinuous.Network(net, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) class Network(ModelA2CContinuousLogStd.Network): def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size): super().__init__(a2c_network, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) return def eval_critic(self, obs): processed_obs = self.norm_obs(obs) value = self.a2c_network.eval_critic(processed_obs) values = self.unnorm_value(value) return values ================================================ FILE: timechamber/ase/hrl_network_builder.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from rl_games.algos_torch import network_builder import torch import torch.nn as nn from timechamber.ase import ase_network_builder class HRLBuilder(network_builder.A2CBuilder): def __init__(self, **kwargs): super().__init__(**kwargs) return class Network(network_builder.A2CBuilder.Network): def __init__(self, params, **kwargs): super().__init__(params, **kwargs) if self.is_continuous: if (not self.space_config['learn_sigma']): actions_num = kwargs.get('actions_num') sigma_init = self.init_factory.create(**self.space_config['sigma_init']) self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False) sigma_init(self.sigma) return def forward(self, obs_dict): mu, sigma, value, states = super().forward(obs_dict) norm_mu = torch.tanh(mu) return norm_mu, sigma, value, states def eval_critic(self, obs): c_out = self.critic_cnn(obs) c_out = c_out.contiguous().view(c_out.size(0), -1) c_out = self.critic_mlp(c_out) value = self.value_act(self.value(c_out)) return value def build(self, name, **kwargs): net = HRLBuilder.Network(self.params, **kwargs) return net ================================================ FILE: timechamber/ase/hrl_players.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy from gym import spaces import numpy as np import os import torch import yaml import time from rl_games.algos_torch import players from rl_games.algos_torch import torch_ext from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.common.player import BasePlayer import timechamber.ase.utils.common_player as common_player import timechamber.ase.ase_models as ase_models import timechamber.ase.ase_network_builder as ase_network_builder import timechamber.ase.ase_players as ase_players class HRLPlayer(common_player.CommonPlayer): def __init__(self, params): config = params['config'] with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f: llc_config = yaml.load(f, Loader=yaml.SafeLoader) llc_config_params = llc_config['params'] self._latent_dim = llc_config_params['config']['latent_dim'] super().__init__(params) self._task_size = self.env.task.get_task_obs_size() self._llc_steps = config['llc_steps'] llc_checkpoint = config['llc_checkpoint'] assert(llc_checkpoint != "") self._build_llc(llc_config_params, llc_checkpoint) return def get_action(self, obs_dict, is_determenistic = False): obs = obs_dict['obs'] if len(obs.size()) == len(self.obs_shape): obs = obs.unsqueeze(0) proc_obs = self._preproc_obs(obs) input_dict = { 'is_train': False, 'prev_actions': None, 'obs' : proc_obs, 'rnn_states' : self.states } with torch.no_grad(): res_dict = self.model(input_dict) mu = res_dict['mus'] action = res_dict['actions'] self.states = res_dict['rnn_states'] if is_determenistic: current_action = mu else: current_action = action current_action = torch.squeeze(current_action.detach()) clamped_actions = torch.clamp(current_action, -1.0, 1.0) return clamped_actions def run(self): n_games = self.games_num render = self.render_env n_game_life = self.n_game_life is_determenistic = self.is_determenistic sum_rewards = 0 sum_steps = 0 sum_game_res = 0 n_games = n_games * n_game_life games_played = 0 has_masks = False has_masks_func = getattr(self.env, "has_action_mask", None) is not None op_agent = getattr(self.env, "create_agent", None) if op_agent: agent_inited = True if has_masks_func: has_masks = self.env.has_action_mask() need_init_rnn = self.is_rnn for _ in range(n_games): if games_played >= n_games: break obs_dict = self.env_reset() batch_size = 1 if len(obs_dict['obs'].size()) > len(self.obs_shape): batch_size = obs_dict['obs'].size()[0] self.batch_size = batch_size if need_init_rnn: self.init_rnn() need_init_rnn = False cr = torch.zeros(batch_size, dtype=torch.float32) steps = torch.zeros(batch_size, dtype=torch.float32) print_game_res = False done_indices = [] for n in range(self.max_steps): obs_dict = self.env_reset(done_indices) if has_masks: masks = self.env.get_action_mask() action = self.get_masked_action(obs_dict, masks, is_determenistic) else: action = self.get_action(obs_dict, is_determenistic) obs_dict, r, done, info = self.env_step(self.env, obs_dict, action) cr += r steps += 1 self._post_step(info) if render: self.env.render(mode = 'human') time.sleep(self.render_sleep) all_done_indices = done.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] done_count = len(done_indices) games_played += done_count if done_count > 0: if self.is_rnn: for s in self.states: s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0 cur_rewards = cr[done_indices].sum().item() cur_steps = steps[done_indices].sum().item() cr = cr * (1.0 - done.float()) steps = steps * (1.0 - done.float()) sum_rewards += cur_rewards sum_steps += cur_steps game_res = 0.0 if isinstance(info, dict): if 'battle_won' in info: print_game_res = True game_res = info.get('battle_won', 0.5) if 'scores' in info: print_game_res = True game_res = info.get('scores', 0.5) if self.print_stats: if print_game_res: print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res) else: print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count) sum_game_res += game_res if batch_size//self.num_agents == 1 or games_played >= n_games: break done_indices = done_indices[:, 0] print(sum_rewards) if print_game_res: print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life) else: print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life) return def env_step(self, env, obs_dict, action): if not self.is_tensor_obses: actions = actions.cpu().numpy() obs = obs_dict['obs'] rewards = 0.0 done_count = 0.0 disc_rewards = 0.0 for t in range(self._llc_steps): llc_actions = self._compute_llc_action(obs, action) obs, curr_rewards, curr_dones, infos = env.step(llc_actions) rewards += curr_rewards done_count += curr_dones amp_obs = infos['amp_obs'] curr_disc_reward = self._calc_disc_reward(amp_obs) curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy() disc_rewards += curr_disc_reward rewards /= self._llc_steps dones = torch.zeros_like(done_count) dones[done_count > 0] = 1.0 disc_rewards /= self._llc_steps if isinstance(obs, dict): obs = obs['obs'] if obs.dtype == np.float64: obs = np.float32(obs) if self.value_size > 1: rewards = rewards[0] if self.is_tensor_obses: return obs, rewards.cpu(), dones.cpu(), infos else: if np.isscalar(dones): rewards = np.expand_dims(np.asarray(rewards), 0) dones = np.expand_dims(np.asarray(dones), 0) return torch.from_numpy(obs).to(self.device), torch.from_numpy(rewards), torch.from_numpy(dones), infos def _build_llc(self, config_params, checkpoint_file): llc_agent_config = self._build_llc_agent_config(config_params) self._llc_agent = ase_players.ASEPlayer(llc_agent_config) self._llc_agent.restore(checkpoint_file) print("Loaded LLC checkpoint from {:s}".format(checkpoint_file)) return def _build_llc_agent_config(self, config_params, network=None): llc_env_info = copy.deepcopy(self.env_info) obs_space = llc_env_info['observation_space'] obs_size = obs_space.shape[0] obs_size -= self._task_size llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size]) llc_env_info['amp_observation_space'] = self.env.amp_observation_space.shape llc_env_info['num_envs'] = self.env.task.num_envs params = config_params params['config']['network'] = network params['config']['env_info'] = llc_env_info return params def _setup_action_space(self): super()._setup_action_space() self.actions_num = self._latent_dim return def _compute_llc_action(self, obs, actions): llc_obs = self._extract_llc_obs(obs) processed_obs = self._llc_agent._preproc_obs(llc_obs) z = torch.nn.functional.normalize(actions, dim=-1) mu, _ = self._llc_agent.model.eval_actor(obs=processed_obs, ase_latents=z) llc_action = players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(mu, -1.0, 1.0)) return llc_action def _extract_llc_obs(self, obs): obs_size = obs.shape[-1] llc_obs = obs[..., :obs_size - self._task_size] return llc_obs def _calc_disc_reward(self, amp_obs): disc_reward = self._llc_agent._calc_disc_rewards(amp_obs) return disc_reward ================================================ FILE: timechamber/ase/utils/amp_agent.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.algos_torch import torch_ext from rl_games.common import a2c_common from rl_games.common import schedulers from rl_games.common import vecenv from isaacgym.torch_utils import * import time from datetime import datetime import numpy as np from torch import optim import torch from torch import nn import timechamber.ase.utils.replay_buffer as replay_buffer import timechamber.ase.utils.common_agent as common_agent from tensorboardX import SummaryWriter class AMPAgent(common_agent.CommonAgent): def __init__(self, base_name, params): super().__init__(base_name, params) if self._normalize_amp_input: self._amp_input_mean_std = RunningMeanStd(self._amp_observation_space.shape).to(self.ppo_device) return def init_tensors(self): super().init_tensors() self._build_amp_buffers() return def set_eval(self): super().set_eval() if self._normalize_amp_input: self._amp_input_mean_std.eval() return def set_train(self): super().set_train() if self._normalize_amp_input: self._amp_input_mean_std.train() return def get_stats_weights(self): state = super().get_stats_weights() if self._normalize_amp_input: state['amp_input_mean_std'] = self._amp_input_mean_std.state_dict() return state def set_stats_weights(self, weights): super().set_stats_weights(weights) if self._normalize_amp_input: self._amp_input_mean_std.load_state_dict(weights['amp_input_mean_std']) return def play_steps(self): self.set_eval() epinfos = [] done_indices = [] update_list = self.update_list for n in range(self.horizon_length): self.obs = self.env_reset(done_indices) self.experience_buffer.update_data('obses', n, self.obs['obs']) if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, masks) else: res_dict = self.get_action_values(self.obs, self._rand_action_probs) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) shaped_rewards = self.rewards_shaper(rewards) self.experience_buffer.update_data('rewards', n, shaped_rewards) self.experience_buffer.update_data('next_obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) self.experience_buffer.update_data('amp_obs', n, infos['amp_obs']) self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask']) terminated = infos['terminate'].float() terminated = terminated.unsqueeze(-1) next_vals = self._eval_critic(self.obs) next_vals *= (1.0 - terminated) self.experience_buffer.update_data('next_values', n, next_vals) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] self.game_rewards.update(self.current_rewards[done_indices]) self.game_lengths.update(self.current_lengths[done_indices]) self.algo_observer.process_infos(infos, done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones if (self.vec_env.env.task.viewer): self._amp_debug(infos) done_indices = done_indices[:, 0] mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_next_values = self.experience_buffer.tensor_dict['next_values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs'] amp_rewards = self._calc_amp_rewards(mb_amp_obs) mb_rewards = self._combine_rewards(mb_rewards, amp_rewards) mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size for k, v in amp_rewards.items(): batch_dict[k] = a2c_common.swap_and_flatten01(v) return batch_dict def get_action_values(self, obs_dict, rand_action_probs): processed_obs = self._preproc_obs(obs_dict['obs']) self.model.eval() input_dict = { 'is_train': False, 'prev_actions': None, 'obs' : processed_obs, 'rnn_states' : self.rnn_states } with torch.no_grad(): res_dict = self.model(input_dict) if self.has_central_value: states = obs_dict['states'] input_dict = { 'is_train': False, 'states' : states, } value = self.get_central_value(input_dict) res_dict['values'] = value if self.normalize_value: res_dict['values'] = self.value_mean_std(res_dict['values'], True) rand_action_mask = torch.bernoulli(rand_action_probs) det_action_mask = rand_action_mask == 0.0 res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask] res_dict['rand_action_mask'] = rand_action_mask return res_dict def prepare_dataset(self, batch_dict): super().prepare_dataset(batch_dict) self.dataset.values_dict['amp_obs'] = batch_dict['amp_obs'] self.dataset.values_dict['amp_obs_demo'] = batch_dict['amp_obs_demo'] self.dataset.values_dict['amp_obs_replay'] = batch_dict['amp_obs_replay'] rand_action_mask = batch_dict['rand_action_mask'] self.dataset.values_dict['rand_action_mask'] = rand_action_mask return def train_epoch(self): play_time_start = time.time() with torch.no_grad(): if self.is_rnn: batch_dict = self.play_steps_rnn() else: batch_dict = self.play_steps() play_time_end = time.time() update_time_start = time.time() rnn_masks = batch_dict.get('rnn_masks', None) self._update_amp_demos() num_obs_samples = batch_dict['amp_obs'].shape[0] amp_obs_demo = self._amp_obs_demo_buffer.sample(num_obs_samples)['amp_obs'] batch_dict['amp_obs_demo'] = amp_obs_demo if (self._amp_replay_buffer.get_total_count() == 0): batch_dict['amp_obs_replay'] = batch_dict['amp_obs'] else: batch_dict['amp_obs_replay'] = self._amp_replay_buffer.sample(num_obs_samples)['amp_obs'] self.set_train() self.curr_frames = batch_dict.pop('played_frames') self.prepare_dataset(batch_dict) self.algo_observer.after_steps() if self.has_central_value: self.train_central_value() train_info = None if self.is_rnn: frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement()) print(frames_mask_ratio) for _ in range(0, self.mini_epochs_num): ep_kls = [] for i in range(len(self.dataset)): curr_train_info = self.train_actor_critic(self.dataset[i]) if self.schedule_type == 'legacy': if self.multi_gpu: curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item()) self.update_lr(self.last_lr) if (train_info is None): train_info = dict() for k, v in curr_train_info.items(): train_info[k] = [v] else: for k, v in curr_train_info.items(): train_info[k].append(v) av_kls = torch_ext.mean_list(train_info['kl']) if self.schedule_type == 'standard': if self.multi_gpu: av_kls = self.hvd.average_value(av_kls, 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) self.update_lr(self.last_lr) if self.schedule_type == 'standard_epoch': if self.multi_gpu: av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) self.update_lr(self.last_lr) update_time_end = time.time() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start self._store_replay_amp_obs(batch_dict['amp_obs']) train_info['play_time'] = play_time train_info['update_time'] = update_time train_info['total_time'] = total_time self._record_train_batch_info(batch_dict, train_info) return train_info def calc_gradients(self, input_dict): self.set_train() value_preds_batch = input_dict['old_values'] old_action_log_probs_batch = input_dict['old_logp_actions'] advantage = input_dict['advantages'] old_mu_batch = input_dict['mu'] old_sigma_batch = input_dict['sigma'] return_batch = input_dict['returns'] actions_batch = input_dict['actions'] obs_batch = input_dict['obs'] obs_batch = self._preproc_obs(obs_batch) amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size] amp_obs = self._preproc_amp_obs(amp_obs) amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size] amp_obs_replay = self._preproc_amp_obs(amp_obs_replay) amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size] amp_obs_demo = self._preproc_amp_obs(amp_obs_demo) amp_obs_demo.requires_grad_(True) rand_action_mask = input_dict['rand_action_mask'] rand_action_sum = torch.sum(rand_action_mask) lr = self.last_lr kl = 1.0 lr_mul = 1.0 curr_e_clip = lr_mul * self.e_clip batch_dict = { 'is_train': True, 'prev_actions': actions_batch, 'obs' : obs_batch, 'amp_obs' : amp_obs, 'amp_obs_replay' : amp_obs_replay, 'amp_obs_demo' : amp_obs_demo } rnn_masks = None if self.is_rnn: rnn_masks = input_dict['rnn_masks'] batch_dict['rnn_states'] = input_dict['rnn_states'] batch_dict['seq_length'] = self.seq_len with torch.cuda.amp.autocast(enabled=self.mixed_precision): res_dict = self.model(batch_dict) action_log_probs = res_dict['prev_neglogp'] values = res_dict['values'] entropy = res_dict['entropy'] mu = res_dict['mus'] sigma = res_dict['sigmas'] disc_agent_logit = res_dict['disc_agent_logit'] disc_agent_replay_logit = res_dict['disc_agent_replay_logit'] disc_demo_logit = res_dict['disc_demo_logit'] a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) a_loss = a_info['actor_loss'] a_clipped = a_info['actor_clipped'].float() c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) c_loss = c_info['critic_loss'] b_loss = self.bound_loss(mu) c_loss = torch.mean(c_loss) a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0) disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo) disc_loss = disc_info['disc_loss'] loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \ + self._disc_coef * disc_loss a_info['actor_loss'] = a_loss a_info['actor_clip_frac'] = a_clip_frac c_info['critic_loss'] = c_loss if self.multi_gpu: self.optimizer.zero_grad() else: for param in self.model.parameters(): param.grad = None self.scaler.scale(loss).backward() #TODO: Refactor this ugliest code of the year if self.truncate_grads: if self.multi_gpu: self.optimizer.synchronize() self.scaler.unscale_(self.optimizer) nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) with self.optimizer.skip_synchronize(): self.scaler.step(self.optimizer) self.scaler.update() else: self.scaler.unscale_(self.optimizer) nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) self.scaler.step(self.optimizer) self.scaler.update() else: self.scaler.step(self.optimizer) self.scaler.update() with torch.no_grad(): reduce_kl = not self.is_rnn kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) if self.is_rnn: kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask self.train_result = { 'entropy': entropy, 'kl': kl_dist, 'last_lr': self.last_lr, 'lr_mul': lr_mul, 'b_loss': b_loss } self.train_result.update(a_info) self.train_result.update(c_info) self.train_result.update(disc_info) return def _load_config_params(self, config): super()._load_config_params(config) # when eps greedy is enabled, rollouts will be generated using a mixture of # a deterministic and stochastic actions. The deterministic actions help to # produce smoother, less noisy, motions that can be used to train a better # discriminator. If the discriminator is only trained with jittery motions # from noisy actions, it can learn to phone in on the jitteriness to # differential between real and fake samples. self._enable_eps_greedy = bool(config['enable_eps_greedy']) self._task_reward_w = config['task_reward_w'] self._disc_reward_w = config['disc_reward_w'] self._amp_observation_space = self.env_info['amp_observation_space'] self._amp_batch_size = int(config['amp_batch_size']) self._amp_minibatch_size = int(config['amp_minibatch_size']) assert(self._amp_minibatch_size <= self.minibatch_size) self._disc_coef = config['disc_coef'] self._disc_logit_reg = config['disc_logit_reg'] self._disc_grad_penalty = config['disc_grad_penalty'] self._disc_weight_decay = config['disc_weight_decay'] self._disc_reward_scale = config['disc_reward_scale'] self._normalize_amp_input = config.get('normalize_amp_input', True) return def _build_net_config(self): config = super()._build_net_config() config['amp_input_shape'] = self._amp_observation_space.shape return config def _build_rand_action_probs(self): num_envs = self.vec_env.env.task.num_envs env_ids = to_torch(np.arange(num_envs), dtype=torch.float32, device=self.ppo_device) self._rand_action_probs = 1.0 - torch.exp(10 * (env_ids / (num_envs - 1.0) - 1.0)) self._rand_action_probs[0] = 1.0 self._rand_action_probs[-1] = 0.0 if not self._enable_eps_greedy: self._rand_action_probs[:] = 1.0 return def _init_train(self): super()._init_train() self._init_amp_demo_buf() return def _disc_loss(self, disc_agent_logit, disc_demo_logit, obs_demo): # prediction loss disc_loss_agent = self._disc_loss_neg(disc_agent_logit) disc_loss_demo = self._disc_loss_pos(disc_demo_logit) disc_loss = 0.5 * (disc_loss_agent + disc_loss_demo) # logit reg logit_weights = self.model.a2c_network.get_disc_logit_weights() disc_logit_loss = torch.sum(torch.square(logit_weights)) disc_loss += self._disc_logit_reg * disc_logit_loss # grad penalty disc_demo_grad = torch.autograd.grad(disc_demo_logit, obs_demo, grad_outputs=torch.ones_like(disc_demo_logit), create_graph=True, retain_graph=True, only_inputs=True) disc_demo_grad = disc_demo_grad[0] disc_demo_grad = torch.sum(torch.square(disc_demo_grad), dim=-1) disc_grad_penalty = torch.mean(disc_demo_grad) disc_loss += self._disc_grad_penalty * disc_grad_penalty # weight decay if (self._disc_weight_decay != 0): disc_weights = self.model.a2c_network.get_disc_weights() disc_weights = torch.cat(disc_weights, dim=-1) disc_weight_decay = torch.sum(torch.square(disc_weights)) disc_loss += self._disc_weight_decay * disc_weight_decay disc_agent_acc, disc_demo_acc = self._compute_disc_acc(disc_agent_logit, disc_demo_logit) disc_info = { 'disc_loss': disc_loss, 'disc_grad_penalty': disc_grad_penalty.detach(), 'disc_logit_loss': disc_logit_loss.detach(), 'disc_agent_acc': disc_agent_acc.detach(), 'disc_demo_acc': disc_demo_acc.detach(), 'disc_agent_logit': disc_agent_logit.detach(), 'disc_demo_logit': disc_demo_logit.detach() } return disc_info def _disc_loss_neg(self, disc_logits): bce = torch.nn.BCEWithLogitsLoss() loss = bce(disc_logits, torch.zeros_like(disc_logits)) return loss def _disc_loss_pos(self, disc_logits): bce = torch.nn.BCEWithLogitsLoss() loss = bce(disc_logits, torch.ones_like(disc_logits)) return loss def _compute_disc_acc(self, disc_agent_logit, disc_demo_logit): agent_acc = disc_agent_logit < 0 agent_acc = torch.mean(agent_acc.float()) demo_acc = disc_demo_logit > 0 demo_acc = torch.mean(demo_acc.float()) return agent_acc, demo_acc def _fetch_amp_obs_demo(self, num_samples): amp_obs_demo = self.vec_env.env.fetch_amp_obs_demo(num_samples) return amp_obs_demo def _build_amp_buffers(self): batch_shape = self.experience_buffer.obs_base_shape self.experience_buffer.tensor_dict['amp_obs'] = torch.zeros(batch_shape + self._amp_observation_space.shape, device=self.ppo_device) self.experience_buffer.tensor_dict['rand_action_mask'] = torch.zeros(batch_shape, dtype=torch.float32, device=self.ppo_device) amp_obs_demo_buffer_size = int(self.config['amp_obs_demo_buffer_size']) self._amp_obs_demo_buffer = replay_buffer.ReplayBuffer(amp_obs_demo_buffer_size, self.ppo_device) self._amp_replay_keep_prob = self.config['amp_replay_keep_prob'] replay_buffer_size = int(self.config['amp_replay_buffer_size']) self._amp_replay_buffer = replay_buffer.ReplayBuffer(replay_buffer_size, self.ppo_device) self._build_rand_action_probs() self.tensor_list += ['amp_obs', 'rand_action_mask'] return def _init_amp_demo_buf(self): buffer_size = self._amp_obs_demo_buffer.get_buffer_size() num_batches = int(np.ceil(buffer_size / self._amp_batch_size)) for i in range(num_batches): curr_samples = self._fetch_amp_obs_demo(self._amp_batch_size) self._amp_obs_demo_buffer.store({'amp_obs': curr_samples}) return def _update_amp_demos(self): new_amp_obs_demo = self._fetch_amp_obs_demo(self._amp_batch_size) self._amp_obs_demo_buffer.store({'amp_obs': new_amp_obs_demo}) return def _preproc_amp_obs(self, amp_obs): if self._normalize_amp_input: amp_obs = self._amp_input_mean_std(amp_obs) return amp_obs def _combine_rewards(self, task_rewards, amp_rewards): disc_r = amp_rewards['disc_rewards'] combined_rewards = self._task_reward_w * task_rewards + \ + self._disc_reward_w * disc_r return combined_rewards def _eval_disc(self, amp_obs): proc_amp_obs = self._preproc_amp_obs(amp_obs) return self.model.a2c_network.eval_disc(proc_amp_obs) def _calc_advs(self, batch_dict): returns = batch_dict['returns'] values = batch_dict['values'] rand_action_mask = batch_dict['rand_action_mask'] advantages = returns - values advantages = torch.sum(advantages, axis=1) if self.normalize_advantage: advantages = torch_ext.normalization_with_masks(advantages, rand_action_mask) return advantages def _calc_amp_rewards(self, amp_obs): disc_r = self._calc_disc_rewards(amp_obs) output = { 'disc_rewards': disc_r } return output def _calc_disc_rewards(self, amp_obs): with torch.no_grad(): disc_logits = self._eval_disc(amp_obs) prob = 1 / (1 + torch.exp(-disc_logits)) disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.ppo_device))) disc_r *= self._disc_reward_scale return disc_r def _store_replay_amp_obs(self, amp_obs): buf_size = self._amp_replay_buffer.get_buffer_size() buf_total_count = self._amp_replay_buffer.get_total_count() if (buf_total_count > buf_size): keep_probs = to_torch(np.array([self._amp_replay_keep_prob] * amp_obs.shape[0]), device=self.ppo_device) keep_mask = torch.bernoulli(keep_probs) == 1.0 amp_obs = amp_obs[keep_mask] if (amp_obs.shape[0] > buf_size): rand_idx = torch.randperm(amp_obs.shape[0]) rand_idx = rand_idx[:buf_size] amp_obs = amp_obs[rand_idx] self._amp_replay_buffer.store({'amp_obs': amp_obs}) return def _record_train_batch_info(self, batch_dict, train_info): super()._record_train_batch_info(batch_dict, train_info) train_info['disc_rewards'] = batch_dict['disc_rewards'] return def _log_train_info(self, train_info, frame): super()._log_train_info(train_info, frame) self.writer.add_scalar('losses/disc_loss', torch_ext.mean_list(train_info['disc_loss']).item(), frame) self.writer.add_scalar('info/disc_agent_acc', torch_ext.mean_list(train_info['disc_agent_acc']).item(), frame) self.writer.add_scalar('info/disc_demo_acc', torch_ext.mean_list(train_info['disc_demo_acc']).item(), frame) self.writer.add_scalar('info/disc_agent_logit', torch_ext.mean_list(train_info['disc_agent_logit']).item(), frame) self.writer.add_scalar('info/disc_demo_logit', torch_ext.mean_list(train_info['disc_demo_logit']).item(), frame) self.writer.add_scalar('info/disc_grad_penalty', torch_ext.mean_list(train_info['disc_grad_penalty']).item(), frame) self.writer.add_scalar('info/disc_logit_loss', torch_ext.mean_list(train_info['disc_logit_loss']).item(), frame) disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards']) self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame) self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame) return def _amp_debug(self, info): with torch.no_grad(): amp_obs = info['amp_obs'] amp_obs = amp_obs[0:1] disc_pred = self._eval_disc(amp_obs) amp_rewards = self._calc_amp_rewards(amp_obs) disc_reward = amp_rewards['disc_rewards'] disc_pred = disc_pred.detach().cpu().numpy()[0, 0] disc_reward = disc_reward.cpu().numpy()[0, 0] print("disc_pred: ", disc_pred, disc_reward) return ================================================ FILE: timechamber/ase/utils/amp_datasets.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch from rl_games.common import datasets class AMPDataset(datasets.PPODataset): def __init__(self, batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len): super().__init__(batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len) self._idx_buf = torch.randperm(batch_size) return def update_mu_sigma(self, mu, sigma): raise NotImplementedError() return def _get_item(self, idx): start = idx * self.minibatch_size end = (idx + 1) * self.minibatch_size sample_idx = self._idx_buf[start:end] input_dict = {} for k,v in self.values_dict.items(): if k not in self.special_names and v is not None: input_dict[k] = v[sample_idx] if (end >= self.batch_size): self._shuffle_idx_buf() return input_dict def _shuffle_idx_buf(self): self._idx_buf[:] = torch.randperm(self.batch_size) return ================================================ FILE: timechamber/ase/utils/amp_models.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch.nn as nn from rl_games.algos_torch.models import ModelA2CContinuousLogStd class ModelAMPContinuous(ModelA2CContinuousLogStd): def __init__(self, network): super().__init__(network) return def build(self, config): net = self.network_builder.build('amp', **config) for name, _ in net.named_parameters(): print(name) # print(f"AMP config: {config}") obs_shape = config['input_shape'] normalize_value = config.get('normalize_value', False) normalize_input = config.get('normalize_input', False) value_size = config.get('value_size', 1) return ModelAMPContinuous.Network(net, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) class Network(ModelA2CContinuousLogStd.Network): def __init__(self, a2c_network, obs_shape, normalize_value, normalize_input, value_size): super().__init__(a2c_network, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) return def forward(self, input_dict): is_train = input_dict.get('is_train', True) result = super().forward(input_dict) if (is_train): amp_obs = input_dict['amp_obs'] disc_agent_logit = self.a2c_network.eval_disc(amp_obs) result["disc_agent_logit"] = disc_agent_logit amp_obs_replay = input_dict['amp_obs_replay'] disc_agent_replay_logit = self.a2c_network.eval_disc(amp_obs_replay) result["disc_agent_replay_logit"] = disc_agent_replay_logit amp_demo_obs = input_dict['amp_obs_demo'] disc_demo_logit = self.a2c_network.eval_disc(amp_demo_obs) result["disc_demo_logit"] = disc_demo_logit return result def eval_actor(self, obs): processed_obs = self.norm_obs(obs) mu, sigma = self.a2c_network.eval_actor(obs=processed_obs) return mu, sigma def eval_critic(self, obs): processed_obs = self.norm_obs(obs) value = self.a2c_network.eval_critic(processed_obs) return value ================================================ FILE: timechamber/ase/utils/amp_network_builder.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from rl_games.algos_torch import torch_ext from rl_games.algos_torch import layers from rl_games.algos_torch import network_builder import torch import torch.nn as nn import numpy as np DISC_LOGIT_INIT_SCALE = 1.0 class AMPBuilder(network_builder.A2CBuilder): def __init__(self, **kwargs): super().__init__(**kwargs) return class Network(network_builder.A2CBuilder.Network): def __init__(self, params, **kwargs): super().__init__(params, **kwargs) if self.is_continuous: if (not self.space_config['learn_sigma']): actions_num = kwargs.get('actions_num') sigma_init = self.init_factory.create(**self.space_config['sigma_init']) self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False) sigma_init(self.sigma) amp_input_shape = kwargs.get('amp_input_shape') self._build_disc(amp_input_shape) return def load(self, params): super().load(params) self._disc_units = params['disc']['units'] self._disc_activation = params['disc']['activation'] self._disc_initializer = params['disc']['initializer'] return def forward(self, obs_dict): obs = obs_dict['obs'] states = obs_dict.get('rnn_states', None) actor_outputs = self.eval_actor(obs) value = self.eval_critic(obs) output = actor_outputs + (value, states) return output def eval_actor(self, obs): a_out = self.actor_cnn(obs) a_out = a_out.contiguous().view(a_out.size(0), -1) a_out = self.actor_mlp(a_out) if self.is_discrete: logits = self.logits(a_out) return logits if self.is_multi_discrete: logits = [logit(a_out) for logit in self.logits] return logits if self.is_continuous: mu = self.mu_act(self.mu(a_out)) if self.space_config['fixed_sigma']: sigma = mu * 0.0 + self.sigma_act(self.sigma) else: sigma = self.sigma_act(self.sigma(a_out)) return mu, sigma return def eval_critic(self, obs): c_out = self.critic_cnn(obs) c_out = c_out.contiguous().view(c_out.size(0), -1) c_out = self.critic_mlp(c_out) value = self.value_act(self.value(c_out)) return value def eval_disc(self, amp_obs): disc_mlp_out = self._disc_mlp(amp_obs) disc_logits = self._disc_logits(disc_mlp_out) return disc_logits def get_disc_logit_weights(self): return torch.flatten(self._disc_logits.weight) def get_disc_weights(self): weights = [] for m in self._disc_mlp.modules(): if isinstance(m, nn.Linear): weights.append(torch.flatten(m.weight)) weights.append(torch.flatten(self._disc_logits.weight)) return weights def _build_disc(self, input_shape): self._disc_mlp = nn.Sequential() mlp_args = { 'input_size' : input_shape[0], 'units' : self._disc_units, 'activation' : self._disc_activation, 'dense_func' : torch.nn.Linear } self._disc_mlp = self._build_mlp(**mlp_args) mlp_out_size = self._disc_units[-1] self._disc_logits = torch.nn.Linear(mlp_out_size, 1) mlp_init = self.init_factory.create(**self._disc_initializer) for m in self._disc_mlp.modules(): if isinstance(m, nn.Linear): mlp_init(m.weight) if getattr(m, "bias", None) is not None: torch.nn.init.zeros_(m.bias) torch.nn.init.uniform_(self._disc_logits.weight, -DISC_LOGIT_INIT_SCALE, DISC_LOGIT_INIT_SCALE) torch.nn.init.zeros_(self._disc_logits.bias) return def build(self, name, **kwargs): net = AMPBuilder.Network(self.params, **kwargs) return net ================================================ FILE: timechamber/ase/utils/amp_players.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch from rl_games.algos_torch import torch_ext from rl_games.algos_torch.running_mean_std import RunningMeanStd import timechamber.ase.utils.common_player as common_player class AMPPlayerContinuous(common_player.CommonPlayer): def __init__(self, params): config = params['config'] self._normalize_amp_input = config.get('normalize_amp_input', True) self._disc_reward_scale = config['disc_reward_scale'] super().__init__(params) return def restore(self, fn): if (fn != 'Base'): super().restore(fn) if self._normalize_amp_input: checkpoint = torch_ext.load_checkpoint(fn) self._amp_input_mean_std.load_state_dict(checkpoint['amp_input_mean_std']) return def _build_net(self, config): super()._build_net(config) if self._normalize_amp_input: self._amp_input_mean_std = RunningMeanStd(config['amp_input_shape']).to(self.device) self._amp_input_mean_std.eval() return def _post_step(self, info): super()._post_step(info) if (self.env.task.viewer): self._amp_debug(info) return def _build_net_config(self): config = super()._build_net_config() if (hasattr(self, 'env')) and self.env is not None: config['amp_input_shape'] = self.env.amp_observation_space.shape else: config['amp_input_shape'] = self.env_info['amp_observation_space'] return config def _amp_debug(self, info): with torch.no_grad(): amp_obs = info['amp_obs'] amp_obs = amp_obs[0:1] disc_pred = self._eval_disc(amp_obs) amp_rewards = self._calc_amp_rewards(amp_obs) disc_reward = amp_rewards['disc_rewards'] disc_pred = disc_pred.detach().cpu().numpy()[0, 0] disc_reward = disc_reward.cpu().numpy()[0, 0] print("disc_pred: ", disc_pred, disc_reward) return def _preproc_amp_obs(self, amp_obs): if self._normalize_amp_input: amp_obs = self._amp_input_mean_std(amp_obs) return amp_obs def _eval_disc(self, amp_obs): proc_amp_obs = self._preproc_amp_obs(amp_obs) return self.model.a2c_network.eval_disc(proc_amp_obs) def _calc_amp_rewards(self, amp_obs): disc_r = self._calc_disc_rewards(amp_obs) output = { 'disc_rewards': disc_r } return output def _calc_disc_rewards(self, amp_obs): with torch.no_grad(): disc_logits = self._eval_disc(amp_obs) prob = 1 / (1 + torch.exp(-disc_logits)) disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.device))) disc_r *= self._disc_reward_scale return disc_r ================================================ FILE: timechamber/ase/utils/common_agent.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy from datetime import datetime from gym import spaces import numpy as np import os import time import yaml from rl_games.algos_torch import a2c_continuous from rl_games.algos_torch import torch_ext from rl_games.algos_torch import central_value from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.common import a2c_common from rl_games.common import datasets from rl_games.common import schedulers from rl_games.common import vecenv import torch from torch import optim import timechamber.ase.utils.amp_datasets as amp_datasets from timechamber.utils.utils import load_check, load_checkpoint from tensorboardX import SummaryWriter class CommonAgent(a2c_continuous.A2CAgent): def __init__(self, base_name, params): a2c_common.A2CBase.__init__(self, base_name, params) self.config = config = params['config'] self._load_config_params(config) self.is_discrete = False self._setup_action_space() self.bounds_loss_coef = config.get('bounds_loss_coef', None) self.clip_actions = config.get('clip_actions', True) self._save_intermediate = config.get('save_intermediate', False) net_config = self._build_net_config() self.model = self.network.build(net_config) self.model.to(self.ppo_device) self.states = None self.init_rnn_from_model(self.model) self.last_lr = float(self.last_lr) self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay) if self.normalize_input: obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) self.running_mean_std = RunningMeanStd(obs_shape).to(self.ppo_device) if self.normalize_value: self.value_mean_std = self.central_value_net.model.value_mean_std if self.has_central_value else self.model.value_mean_std if self.has_central_value: cv_config = { 'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape), 'value_size' : self.value_size, 'ppo_device' : self.ppo_device, 'num_agents' : self.num_agents, 'horizon_length' : self.horizon_length, 'num_actors' : self.num_actors, 'num_actions' : self.actions_num, 'seq_len' : self.seq_len, 'model' : self.central_value_config['network'], 'config' : self.central_value_config, 'writter' : self.writer, 'multi_gpu' : self.multi_gpu } self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device) self.use_experimental_cv = self.config.get('use_experimental_cv', True) self.dataset = amp_datasets.AMPDataset(self.batch_size, self.minibatch_size, self.is_discrete, self.is_rnn, self.ppo_device, self.seq_len) self.algo_observer.after_init(self) return def init_tensors(self): super().init_tensors() self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses']) self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values']) self.tensor_list += ['next_obses'] return def train(self): self.init_tensors() self.last_mean_rewards = -100500 start_time = time.time() total_time = 0 rep_count = 0 self.frame = 0 self.obs = self.env_reset() self.curr_frames = self.batch_size_envs model_output_file = os.path.join(self.nn_dir, self.config['name']) if self.multi_gpu: self.hvd.setup_algo(self) self._init_train() while True: epoch_num = self.update_epoch() train_info = self.train_epoch() sum_time = train_info['total_time'] total_time += sum_time frame = self.frame if self.multi_gpu: self.hvd.sync_stats(self) if self.rank == 0: scaled_time = sum_time scaled_play_time = train_info['play_time'] curr_frames = self.curr_frames self.frame += curr_frames if self.print_stats: fps_step = curr_frames / scaled_play_time fps_total = curr_frames / scaled_time print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}') self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame) self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame) self.writer.add_scalar('info/epochs', epoch_num, frame) self._log_train_info(train_info, frame) self.algo_observer.after_print_stats(frame, epoch_num, total_time) if self.game_rewards.current_size > 0: mean_rewards = self._get_mean_rewards() mean_lengths = self.game_lengths.get_mean() for i in range(self.value_size): self.writer.add_scalar('rewards{0}/frame'.format(i), mean_rewards[i], frame) self.writer.add_scalar('rewards{0}/iter'.format(i), mean_rewards[i], epoch_num) self.writer.add_scalar('rewards{0}/time'.format(i), mean_rewards[i], total_time) self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame) self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num) if self.has_self_play_config: self.self_play_manager.update(self) if self.save_freq > 0: if (epoch_num % self.save_freq == 0): self.save(model_output_file) if (self._save_intermediate): int_model_output_file = model_output_file + '_' + str(epoch_num).zfill(8) self.save(int_model_output_file) if epoch_num > self.max_epochs: self.save(model_output_file) print('MAX EPOCHS NUM!') return self.last_mean_rewards, epoch_num update_time = 0 return def set_full_state_weights(self, weights): self.set_weights(weights) self.epoch_num = weights['epoch'] if self.has_central_value: self.central_value_net.load_state_dict(weights['assymetric_vf_nets']) self.optimizer.load_state_dict(weights['optimizer']) self.frame = weights.get('frame', 0) self.last_mean_rewards = weights.get('last_mean_rewards', -100500) if self.vec_env is not None: env_state = weights.get('env_state', None) self.vec_env.set_env_state(env_state) return def restore(self, fn): checkpoint = load_checkpoint(fn, device=self.device) checkpoint = load_check(checkpoint=checkpoint, normalize_input=self.normalize_input, normalize_value=self.normalize_value) self.set_full_state_weights(checkpoint) def train_epoch(self): play_time_start = time.time() with torch.no_grad(): if self.is_rnn: batch_dict = self.play_steps_rnn() else: batch_dict = self.play_steps() play_time_end = time.time() update_time_start = time.time() rnn_masks = batch_dict.get('rnn_masks', None) self.set_train() self.curr_frames = batch_dict.pop('played_frames') self.prepare_dataset(batch_dict) self.algo_observer.after_steps() if self.has_central_value: self.train_central_value() train_info = None if self.is_rnn: frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement()) print(frames_mask_ratio) for _ in range(0, self.mini_epochs_num): ep_kls = [] for i in range(len(self.dataset)): curr_train_info = self.train_actor_critic(self.dataset[i]) if self.schedule_type == 'legacy': if self.multi_gpu: curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item()) self.update_lr(self.last_lr) if (train_info is None): train_info = dict() for k, v in curr_train_info.items(): train_info[k] = [v] else: for k, v in curr_train_info.items(): train_info[k].append(v) av_kls = torch_ext.mean_list(train_info['kl']) if self.schedule_type == 'standard': if self.multi_gpu: av_kls = self.hvd.average_value(av_kls, 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) self.update_lr(self.last_lr) if self.schedule_type == 'standard_epoch': if self.multi_gpu: av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) self.update_lr(self.last_lr) update_time_end = time.time() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start train_info['step_time'] = batch_dict['step_time'] train_info['play_time'] = play_time train_info['update_time'] = update_time train_info['total_time'] = total_time self._record_train_batch_info(batch_dict, train_info) return train_info def play_steps(self): self.set_eval() epinfos = [] done_indices = [] update_list = self.update_list for n in range(self.horizon_length): self.obs = self.env_reset(done_indices) self.experience_buffer.update_data('obses', n, self.obs['obs']) if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, masks) else: res_dict = self.get_action_values(self.obs) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) shaped_rewards = self.rewards_shaper(rewards) self.experience_buffer.update_data('rewards', n, shaped_rewards) self.experience_buffer.update_data('next_obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) terminated = infos['terminate'].float() terminated = terminated.unsqueeze(-1) next_vals = self._eval_critic(self.obs) next_vals *= (1.0 - terminated) self.experience_buffer.update_data('next_values', n, next_vals) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] self.game_rewards.update(self.current_rewards[done_indices]) self.game_lengths.update(self.current_lengths[done_indices]) self.algo_observer.process_infos(infos, done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones done_indices = done_indices[:, 0] mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_next_values = self.experience_buffer.tensor_dict['next_values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size return batch_dict def prepare_dataset(self, batch_dict): obses = batch_dict['obses'] returns = batch_dict['returns'] dones = batch_dict['dones'] values = batch_dict['values'] actions = batch_dict['actions'] neglogpacs = batch_dict['neglogpacs'] mus = batch_dict['mus'] sigmas = batch_dict['sigmas'] rnn_states = batch_dict.get('rnn_states', None) rnn_masks = batch_dict.get('rnn_masks', None) advantages = self._calc_advs(batch_dict) if self.normalize_value: self.value_mean_std.train() values = self.value_mean_std(values) returns = self.value_mean_std(returns) self.value_mean_std.eval() dataset_dict = {} dataset_dict['old_values'] = values dataset_dict['old_logp_actions'] = neglogpacs dataset_dict['advantages'] = advantages dataset_dict['returns'] = returns dataset_dict['actions'] = actions dataset_dict['obs'] = obses dataset_dict['rnn_states'] = rnn_states dataset_dict['rnn_masks'] = rnn_masks dataset_dict['mu'] = mus dataset_dict['sigma'] = sigmas self.dataset.update_values_dict(dataset_dict) if self.has_central_value: dataset_dict = {} dataset_dict['old_values'] = values dataset_dict['advantages'] = advantages dataset_dict['returns'] = returns dataset_dict['actions'] = actions dataset_dict['obs'] = batch_dict['states'] dataset_dict['rnn_masks'] = rnn_masks self.central_value_net.update_dataset(dataset_dict) return def calc_gradients(self, input_dict): self.set_train() value_preds_batch = input_dict['old_values'] old_action_log_probs_batch = input_dict['old_logp_actions'] advantage = input_dict['advantages'] old_mu_batch = input_dict['mu'] old_sigma_batch = input_dict['sigma'] return_batch = input_dict['returns'] actions_batch = input_dict['actions'] obs_batch = input_dict['obs'] obs_batch = self._preproc_obs(obs_batch) lr = self.last_lr kl = 1.0 lr_mul = 1.0 curr_e_clip = lr_mul * self.e_clip batch_dict = { 'is_train': True, 'prev_actions': actions_batch, 'obs' : obs_batch } rnn_masks = None if self.is_rnn: rnn_masks = input_dict['rnn_masks'] batch_dict['rnn_states'] = input_dict['rnn_states'] batch_dict['seq_length'] = self.seq_len with torch.cuda.amp.autocast(enabled=self.mixed_precision): res_dict = self.model(batch_dict) action_log_probs = res_dict['prev_neglogp'] values = res_dict['values'] entropy = res_dict['entropy'] mu = res_dict['mus'] sigma = res_dict['sigmas'] a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) a_loss = a_info['actor_loss'] c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) c_loss = c_info['critic_loss'] b_loss = self.bound_loss(mu) a_loss = torch.mean(a_loss) c_loss = torch.mean(c_loss) b_loss = torch.mean(b_loss) entropy = torch.mean(entropy) loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss a_clip_frac = torch.mean(a_info['actor_clipped'].float()) a_info['actor_loss'] = a_loss a_info['actor_clip_frac'] = a_clip_frac if self.multi_gpu: self.optimizer.zero_grad() else: for param in self.model.parameters(): param.grad = None self.scaler.scale(loss).backward() self.scaler.step(self.optimizer) self.scaler.update() with torch.no_grad(): reduce_kl = not self.is_rnn kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) self.train_result = { 'entropy': entropy, 'kl': kl_dist, 'last_lr': self.last_lr, 'lr_mul': lr_mul, 'b_loss': b_loss } self.train_result.update(a_info) self.train_result.update(c_info) return def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values): lastgaelam = 0 mb_advs = torch.zeros_like(mb_rewards) for t in reversed(range(self.horizon_length)): not_done = 1.0 - mb_fdones[t] not_done = not_done.unsqueeze(1) delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t] lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam mb_advs[t] = lastgaelam return mb_advs def env_reset(self, env_ids=None): obs = self.vec_env.reset(env_ids) obs = self.obs_to_tensors(obs) return obs def bound_loss(self, mu): if self.bounds_loss_coef is not None: soft_bound = 1.0 mu_loss_high = torch.clamp_min(mu - soft_bound, 0.0)**2 mu_loss_low = torch.clamp_max(mu + soft_bound, 0.0)**2 b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1) else: b_loss = 0 return b_loss def _get_mean_rewards(self): return self.game_rewards.get_mean() def _load_config_params(self, config): self.last_lr = config['learning_rate'] return def _build_net_config(self): obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) config = { 'actions_num' : self.actions_num, 'input_shape' : obs_shape, 'num_seqs' : self.num_actors * self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value' : self.normalize_value, 'normalize_input': self.normalize_input, } return config def _setup_action_space(self): action_space = self.env_info['action_space'] self.actions_num = action_space.shape[0] # todo introduce device instead of cuda() self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device) self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device) return def _init_train(self): return def _eval_critic(self, obs_dict): self.model.eval() obs = obs_dict['obs'] processed_obs = self._preproc_obs(obs) value = self.model.eval_critic(processed_obs) return value def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip): ratio = torch.exp(old_action_log_probs_batch - action_log_probs) surr1 = advantage * ratio surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip, 1.0 + curr_e_clip) a_loss = torch.max(-surr1, -surr2) clipped = torch.abs(ratio - 1.0) > curr_e_clip clipped = clipped.detach() info = { 'actor_loss': a_loss, 'actor_clipped': clipped.detach() } return info def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value): if clip_value: value_pred_clipped = value_preds_batch + \ (values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip) value_losses = (values - return_batch)**2 value_losses_clipped = (value_pred_clipped - return_batch)**2 c_loss = torch.max(value_losses, value_losses_clipped) else: c_loss = (return_batch - values)**2 info = { 'critic_loss': c_loss } return info def _calc_advs(self, batch_dict): returns = batch_dict['returns'] values = batch_dict['values'] advantages = returns - values advantages = torch.sum(advantages, axis=1) if self.normalize_advantage: advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) return advantages def _record_train_batch_info(self, batch_dict, train_info): return def _log_train_info(self, train_info, frame): self.writer.add_scalar('performance/update_time', train_info['update_time'], frame) self.writer.add_scalar('performance/play_time', train_info['play_time'], frame) self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame) self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame) self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame) self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame) self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame) self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame) self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame) self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame) self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame) return ================================================ FILE: timechamber/ase/utils/common_player.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch from rl_games.algos_torch import players from rl_games.algos_torch import torch_ext from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.common.player import BasePlayer from timechamber.utils.utils import load_check, load_checkpoint import numpy as np class CommonPlayer(players.PpoPlayerContinuous): def __init__(self, params): config = params['config'] BasePlayer.__init__(self, params) self.network = config['network'] self._setup_action_space() self.mask = [False] self.normalize_input = self.config['normalize_input'] self.normalize_value = self.config.get('normalize_value', False) net_config = self._build_net_config() self._build_net(net_config) return def run(self): n_games = self.games_num render = self.render_env n_game_life = self.n_game_life is_determenistic = self.is_determenistic sum_rewards = 0 sum_steps = 0 sum_game_res = 0 n_games = n_games * n_game_life games_played = 0 has_masks = False has_masks_func = getattr(self.env, "has_action_mask", None) is not None op_agent = getattr(self.env, "create_agent", None) if op_agent: agent_inited = True if has_masks_func: has_masks = self.env.has_action_mask() need_init_rnn = self.is_rnn for _ in range(n_games): if games_played >= n_games: break obs_dict = self.env_reset() batch_size = 1 batch_size = self.get_batch_size(obs_dict['obs'], batch_size) if need_init_rnn: self.init_rnn() need_init_rnn = False cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device) steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device) print_game_res = False done_indices = [] for n in range(self.max_steps): # obs_dict = self.env_reset(done_indices) if has_masks: masks = self.env.get_action_mask() action = self.get_masked_action(obs_dict, masks, is_determenistic) else: action = self.get_action(obs_dict, is_determenistic) obs_dict, r, done, info = self.env_step(self.env, action) obs_dict = {'obs': obs_dict} # print('obs_dict shape: ', obs_dict.shape) cr += r steps += 1 self._post_step(info) if render: self.env.render(mode = 'human') time.sleep(self.render_sleep) all_done_indices = done.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] done_count = len(done_indices) games_played += done_count if done_count > 0: if self.is_rnn: for s in self.states: s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0 cur_rewards = cr[done_indices].sum().item() cur_steps = steps[done_indices].sum().item() cr = cr * (1.0 - done.float()) steps = steps * (1.0 - done.float()) sum_rewards += cur_rewards sum_steps += cur_steps game_res = 0.0 if isinstance(info, dict): if 'battle_won' in info: print_game_res = True game_res = info.get('battle_won', 0.5) if 'scores' in info: print_game_res = True game_res = info.get('scores', 0.5) if self.print_stats: if print_game_res: print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res) else: print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count) sum_game_res += game_res if batch_size//self.num_agents == 1 or games_played >= n_games: break done_indices = done_indices[:, 0] print(sum_rewards) if print_game_res: print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life) else: print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life) return def get_action(self, obs_dict, is_determenistic = False): output = super().get_action(obs_dict['obs'], is_determenistic) return output def env_step(self, env, actions): if not self.is_tensor_obses: actions = actions.cpu().numpy() obs, rewards, dones, infos = env.step(actions) if hasattr(obs, 'dtype') and obs.dtype == np.float64: obs = np.float32(obs) if self.value_size > 1: rewards = rewards[0] if self.is_tensor_obses: return obs, rewards.to(self.device), dones.to(self.device), infos else: if np.isscalar(dones): rewards = np.expand_dims(np.asarray(rewards), 0) dones = np.expand_dims(np.asarray(dones), 0) return self.obs_to_torch(obs), torch.from_numpy(rewards), torch.from_numpy(dones), infos def _build_net(self, config): self.model = self.network.build(config) self.model.to(self.device) self.model.eval() self.is_rnn = self.model.is_rnn() if self.normalize_input: obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) self.running_mean_std = RunningMeanStd(obs_shape).to(self.device) self.running_mean_std.eval() return def env_reset(self, env_ids=None): obs = self.env.reset(env_ids) return self.obs_to_torch(obs) def _post_step(self, info): return def _build_net_config(self): obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) config = { 'actions_num' : self.actions_num, 'input_shape' : obs_shape, 'num_seqs' : self.num_agents, 'normalize_input': self.normalize_input, 'normalize_value' : self.normalize_value, } return config def restore(self, fn): checkpoint = load_checkpoint(fn, device=self.device) checkpoint = load_check(checkpoint=checkpoint, normalize_input=self.normalize_input, normalize_value=self.normalize_value) self.model.load_state_dict(checkpoint['model']) if self.normalize_input and 'running_mean_std' in checkpoint: self.model.running_mean_std.load_state_dict(checkpoint['running_mean_std']) def _setup_action_space(self): self.actions_num = self.action_space.shape[0] self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device) self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device) return ================================================ FILE: timechamber/ase/utils/replay_buffer.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch class ReplayBuffer(): def __init__(self, buffer_size, device): self._head = 0 self._total_count = 0 self._buffer_size = buffer_size self._device = device self._data_buf = None self._sample_idx = torch.randperm(buffer_size) self._sample_head = 0 return def reset(self): self._head = 0 self._total_count = 0 self._reset_sample_idx() return def get_buffer_size(self): return self._buffer_size def get_total_count(self): return self._total_count def store(self, data_dict): if (self._data_buf is None): self._init_data_buf(data_dict) n = next(iter(data_dict.values())).shape[0] buffer_size = self.get_buffer_size() assert(n <= buffer_size) for key, curr_buf in self._data_buf.items(): curr_n = data_dict[key].shape[0] assert(n == curr_n) store_n = min(curr_n, buffer_size - self._head) curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n] remainder = n - store_n if (remainder > 0): curr_buf[0:remainder] = data_dict[key][store_n:] self._head = (self._head + n) % buffer_size self._total_count += n return def sample(self, n): total_count = self.get_total_count() buffer_size = self.get_buffer_size() idx = torch.arange(self._sample_head, self._sample_head + n) idx = idx % buffer_size rand_idx = self._sample_idx[idx] if (total_count < buffer_size): rand_idx = rand_idx % self._head samples = dict() for k, v in self._data_buf.items(): samples[k] = v[rand_idx] self._sample_head += n if (self._sample_head >= buffer_size): self._reset_sample_idx() return samples def _reset_sample_idx(self): buffer_size = self.get_buffer_size() self._sample_idx[:] = torch.randperm(buffer_size) self._sample_head = 0 return def _init_data_buf(self, data_dict): buffer_size = self.get_buffer_size() self._data_buf = dict() for k, v in data_dict.items(): v_shape = v.shape[1:] self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device) return ================================================ FILE: timechamber/cfg/config.yaml ================================================ # Task name - used to pick the class to load task_name: ${task.name} # experiment name. defaults to name of training config experiment: '' # if set to positive integer, overrides the default number of environments num_envs: '' # seed - set to -1 to choose random seed seed: 42 # set to True for deterministic performance torch_deterministic: False # set the maximum number of learning iterations to train for. overrides default per-environment setting max_iterations: '' # set minibatch_size minibatch_size: 32768 ## Device config # 'physx' or 'flex' physics_engine: 'physx' # whether to use cpu or gpu pipeline pipeline: 'gpu' use_gpu: True use_gpu_pipeline: True # device for running physics simulation sim_device: 'cuda:0' # device to run RL rl_device: 'cuda:0' graphics_device_id: 0 device_type: cuda ## PhysX arguments num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only. solver_type: 1 # 0: pgs, 1: tgs num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread # RLGames Arguments # test - if set, run policy in inference mode (requires setting checkpoint to load) test: False # used to set checkpoint path checkpoint: '' op_checkpoint: '' player_pool_type: '' num_agents: 2 # HRL Arguments motion_file: 'tasks/data/motions/reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy' # set to True to use multi-gpu horovod training multi_gpu: False wandb_activate: False wandb_group: '' wandb_name: ${train.params.config.name} wandb_entity: '' wandb_project: 'timechamber' capture_video: False capture_video_freq: 1464 capture_video_len: 100 force_render: True # disables rendering headless: True # set default task and default training config based on task defaults: - task: MA_Humanoid_Strike - train: ${task}HRL - hydra/job_logging: disabled # set the directory where the output files get saved hydra: output_subdir: null run: dir: . ================================================ FILE: timechamber/cfg/task/MA_Ant_Battle.yaml ================================================ # used to create the object name: MA_Ant_Battle physics_engine: ${..physics_engine} # if given, will override the device setting in gym. env: # numEnvs: ${...num_envs} numEnvs: ${resolve_default:4096,${...num_envs}} numAgents: ${...num_agents} # rgb color of Ant body color: [ [ 0.97, 0.38, 0.06 ],[ 0.24, 0.38, 0.06 ],[ 0.56, 0.85, 0.25 ],[ 0.56, 0.85, 0.25 ],[ 0.14, 0.97, 0.24 ],[ 0.63, 0.2, 0.87 ] ] envSpacing: 6 borderlineSpace: 3 episodeLength: 1000 enableDebugVis: False controlFrequencyInv: 1 clipActions: 1.0 clipObservations: 5.0 actionScale: 0.5 control: # PD Drive parameters: stiffness: 85.0 # [N*m/rad] damping: 2.0 # [N*m*s/rad] actionScale: 0.5 controlFrequencyInv: 1 # 60 Hz # reward parameters headingWeight: 0.5 upWeight: 0.1 # cost parameters terminationHeight: 0.31 dofVelocityScale: 0.2 jointsAtLimitCost: -0.1 plane: staticFriction: 1.0 dynamicFriction: 1.0 restitution: 0.0 asset: assetFileName: "mjcf/nv_ant.xml" # set to True if you use camera sensors in the environment enableCameraSensors: False sim: dt: 0.0166 # 1/60 s substeps: 2 up_axis: "z" use_gpu_pipeline: ${eq:${...pipeline},"gpu"} gravity: [ 0.0, 0.0, -9.81 ] physx: num_threads: ${....num_threads} solver_type: ${....solver_type} use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU num_position_iterations: 4 num_velocity_iterations: 0 contact_offset: 0.02 rest_offset: 0.0 bounce_threshold_velocity: 0.2 max_depenetration_velocity: 10.0 default_buffer_size_multiplier: 5.0 max_gpu_contact_pairs: 8388608 # 8*1024*1024 num_subscenes: ${....num_subscenes} contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts) task: randomize: False randomization_params: # specify which attributes to randomize for each actor type and property frequency: 600 # Define how many environment steps between generating new randomizations observations: range: [ 0, .002 ] # range for the white noise operation: "additive" distribution: "gaussian" actions: range: [ 0., .02 ] operation: "additive" distribution: "gaussian" actor_params: ant: color: True rigid_body_properties: mass: range: [ 0.5, 1.5 ] operation: "scaling" distribution: "uniform" setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info. dof_properties: damping: range: [ 0.5, 1.5 ] operation: "scaling" distribution: "uniform" stiffness: range: [ 0.5, 1.5 ] operation: "scaling" distribution: "uniform" lower: range: [ 0, 0.01 ] operation: "additive" distribution: "gaussian" upper: range: [ 0, 0.01 ] operation: "additive" distribution: "gaussian" ================================================ FILE: timechamber/cfg/task/MA_Ant_Sumo.yaml ================================================ # used to create the object name: MA_Ant_Sumo physics_engine: ${..physics_engine} # if given, will override the device setting in gym. env: # numEnvs: ${...num_envs} numEnvs: ${resolve_default:4096,${...num_envs}} numAgents: ${...num_agents} envSpacing: 6 borderlineSpace: 3 episodeLength: 1000 enableDebugVis: False controlFrequencyInv: 1 clipActions: 1.0 clipObservations: 5.0 actionScale: 0.5 control: # PD Drive parameters: stiffness: 85.0 # [N*m/rad] damping: 2.0 # [N*m*s/rad] actionScale: 0.5 controlFrequencyInv: 1 # 60 Hz # reward parameters headingWeight: 0.5 upWeight: 0.1 # cost parameters terminationHeight: 0.31 dofVelocityScale: 0.2 jointsAtLimitCost: -0.1 plane: staticFriction: 1.0 dynamicFriction: 1.0 restitution: 0.0 asset: assetFileName: "mjcf/nv_ant.xml" # set to True if you use camera sensors in the environment enableCameraSensors: False sim: dt: 0.0166 # 1/60 s substeps: 2 up_axis: "z" use_gpu_pipeline: ${eq:${...pipeline},"gpu"} gravity: [0.0, 0.0, -9.81] physx: num_threads: ${....num_threads} solver_type: ${....solver_type} use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU num_position_iterations: 4 num_velocity_iterations: 0 contact_offset: 0.02 rest_offset: 0.0 bounce_threshold_velocity: 0.2 max_depenetration_velocity: 10.0 default_buffer_size_multiplier: 5.0 max_gpu_contact_pairs: 8388608 # 8*1024*1024 num_subscenes: ${....num_subscenes} contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts) task: randomize: False randomization_params: # specify which attributes to randomize for each actor type and property frequency: 600 # Define how many environment steps between generating new randomizations observations: range: [0, .002] # range for the white noise operation: "additive" distribution: "gaussian" actions: range: [0., .02] operation: "additive" distribution: "gaussian" actor_params: ant: color: True rigid_body_properties: mass: range: [0.5, 1.5] operation: "scaling" distribution: "uniform" setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info. dof_properties: damping: range: [0.5, 1.5] operation: "scaling" distribution: "uniform" stiffness: range: [0.5, 1.5] operation: "scaling" distribution: "uniform" lower: range: [0, 0.01] operation: "additive" distribution: "gaussian" upper: range: [0, 0.01] operation: "additive" distribution: "gaussian" ================================================ FILE: timechamber/cfg/task/MA_Humanoid_Strike.yaml ================================================ name: MA_Humanoid_Strike physics_engine: ${..physics_engine} # if given, will override the device setting in gym. env: numEnvs: ${resolve_default:4096,${...num_envs}} envSpacing: 6 episodeLength: 1500 borderlineSpace: 3.0 numAgents: 2 isFlagrun: False enableDebugVis: False pdControl: True powerScale: 1.0 controlFrequencyInv: 2 # 30 Hz stateInit: "Default" hybridInitProb: 0.5 numAMPObsSteps: 10 localRootObs: True keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"] contactBodies: ["right_foot", "left_foot"] # forceBodies: ["torso", "right_upper_arm", "right_thigh", "right_shin", "left_thigh", "left_shin"] forceBodies: ["torso", "right_thigh", "right_shin", "left_thigh", "left_shin"] terminationHeight: 0.15 enableEarlyTermination: True strikeBodyNames: ["sword", "shield", "right_hand", "right_lower_arm", "left_hand", "left_lower_arm"] enableTaskObs: True asset: assetRoot: "tasks/data/assets" assetFileName: "mjcf/amp_humanoid_sword_shield.xml" plane: staticFriction: 1.0 dynamicFriction: 1.0 restitution: 0.0 sim: substeps: 2 physx: num_threads: 4 solver_type: 1 # 0: pgs, 1: tgs num_position_iterations: 4 num_velocity_iterations: 0 contact_offset: 0.02 rest_offset: 0.0 bounce_threshold_velocity: 0.2 max_depenetration_velocity: 10.0 default_buffer_size_multiplier: 10.0 flex: num_inner_iterations: 10 warm_start: 0.25 ================================================ FILE: timechamber/cfg/train/MA_Ant_BattlePPO.yaml ================================================ params: seed: ${...seed} algo: name: self_play_continuous model: name: continuous_a2c_logstd network: name: actor_critic separate: False space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: 0 fixed_sigma: True mlp: units: [ 256, 128, 64 ] activation: elu d2rl: False initializer: name: default player_pool_type: ${...player_pool_type} load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint load_path: ${...checkpoint} # path to the checkpoint to load op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself num_agents: ${...num_agents} update_win_rate: 0.7 player_pool_length: 4 games_to_check: 400 max_update_steps: 5000 device: ${...rl_device} config: name: ${resolve_default:MA_Ant_1v1,${....experiment}} env_name: rlgpu multi_gpu: ${....multi_gpu} ppo: True mixed_precision: False normalize_input: True normalize_value: True value_bootstrap: True num_actors: ${....task.env.numEnvs} reward_shaper: scale_value: 0.01 normalize_advantage: True gamma: 0.99 tau: 0.95 learning_rate: 3e-4 lr_schedule: adaptive schedule_type: standard kl_threshold: 0.008 score_to_win: 20000 max_epochs: ${resolve_default:2000,${....max_iterations}} save_best_after: 200 save_frequency: 1000 grad_norm: 1.0 entropy_coef: 0.0 truncate_grads: True e_clip: 0.2 horizon_length: 64 minibatch_size: ${resolve_default:32768,${....minibatch_size}} mini_epochs: 4 critic_coef: 2 clip_value: True use_smooth_clamp: True bounds_loss_coef: 0.0000 player: games_num: 4000 record_elo: True init_elo: 400 ================================================ FILE: timechamber/cfg/train/MA_Ant_SumoPPO.yaml ================================================ params: seed: ${...seed} algo: name: self_play_continuous model: name: continuous_a2c_logstd network: name: actor_critic separate: False space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: 0 fixed_sigma: True mlp: units: [ 256, 128, 64 ] activation: elu d2rl: False initializer: name: default # self play agent related player_pool_type: ${...player_pool_type} load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint load_path: ${...checkpoint} # path to the checkpoint to load op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself num_agents: ${...num_agents} update_win_rate: 0.7 player_pool_length: 2 games_to_check: 400 max_update_steps: 5000 device: ${...rl_device} config: name: ${resolve_default:MA_Ant_1v1,${....experiment}} env_name: rlgpu multi_gpu: ${....multi_gpu} ppo: True mixed_precision: False normalize_input: True normalize_value: True value_bootstrap: True num_actors: ${....task.env.numEnvs} reward_shaper: scale_value: 0.01 normalize_advantage: True gamma: 0.99 tau: 0.95 learning_rate: 3e-4 lr_schedule: adaptive schedule_type: standard kl_threshold: 0.008 score_to_win: 20000 max_epochs: ${resolve_default:100000,${....max_iterations}} save_best_after: 200 save_frequency: 500 grad_norm: 1.0 entropy_coef: 0.0 truncate_grads: True e_clip: 0.2 horizon_length: 64 minibatch_size: ${resolve_default:32768,${....minibatch_size}} mini_epochs: 4 critic_coef: 2 clip_value: True use_smooth_clamp: True bounds_loss_coef: 0.0000 player: games_num: 4000 record_elo: True init_elo: 400 ================================================ FILE: timechamber/cfg/train/MA_Humanoid_StrikeHRL.yaml ================================================ params: seed: ${...seed} algo: name: self_play_hrl model: name: hrl network: name: hrl separate: True space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: -2.3 fixed_sigma: True learn_sigma: False mlp: units: [1024, 512] activation: relu d2rl: False initializer: name: default regularizer: name: None # self play agent related player_pool_type: ${...player_pool_type} load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint load_path: ${...checkpoint} # path to the checkpoint to load op_load_path: ${if:${...op_checkpoint},${...op_checkpoint},${...checkpoint}} # default play with myself num_agents: ${...num_agents} update_win_rate: 0.8 player_pool_length: 4 games_to_check: 400 max_update_steps: 5000 device: ${...rl_device} config: name: Humanoid env_name: rlgpu multi_gpu: False ppo: True mixed_precision: False normalize_input: True normalize_value: True num_actors: ${....task.env.numEnvs} reward_shaper: scale_value: 1 normalize_advantage: True gamma: 0.99 tau: 0.95 learning_rate: 2e-5 lr_schedule: constant score_to_win: 20000000 max_epochs: ${resolve_default:100000,${....max_iterations}} save_best_after: 10 save_frequency: 50 print_stats: True grad_norm: 1.0 entropy_coef: 0.0 truncate_grads: False e_clip: 0.2 horizon_length: 64 minibatch_size: ${resolve_default:64,${....minibatch_size}} mini_epochs: 6 critic_coef: 5 clip_value: False seq_len: 4 bounds_loss_coef: 10 task_reward_w: 0.9 disc_reward_w: 0.1 player: determenistic: False games_num: 4000 record_elo: True init_elo: 400 llc_steps: 5 llc_config: cfg/train/base/ase_humanoid_hrl.yaml llc_checkpoint: tasks/data/models/llc_reallusion_sword_shield.pth ================================================ FILE: timechamber/cfg/train/base/ase_humanoid_hrl.yaml ================================================ params: seed: -1 algo: name: ase model: name: ase network: name: ase separate: True space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: -2.9 fixed_sigma: True learn_sigma: False mlp: units: [1024, 1024, 512] activation: relu d2rl: False initializer: name: default regularizer: name: None disc: units: [1024, 1024, 512] activation: relu initializer: name: default enc: units: [1024, 512] activation: relu separate: False initializer: name: default load_checkpoint: False config: name: Humanoid env_name: rlgpu multi_gpu: False ppo: True mixed_precision: False normalize_input: True normalize_value: True reward_shaper: scale_value: 1 normalize_advantage: True gamma: 0.99 tau: 0.95 learning_rate: 2e-5 lr_schedule: constant score_to_win: 20000 max_epochs: 100000 save_best_after: 50 save_frequency: 50 print_stats: True grad_norm: 1.0 entropy_coef: 0.0 truncate_grads: False ppo: True e_clip: 0.2 horizon_length: 32 minibatch_size: 1 mini_epochs: 6 critic_coef: 5 clip_value: False seq_len: 4 bounds_loss_coef: 10 amp_obs_demo_buffer_size: 200000 amp_replay_buffer_size: 200000 amp_replay_keep_prob: 0.01 amp_batch_size: 32 amp_minibatch_size: 1 disc_coef: 5 disc_logit_reg: 0.01 disc_grad_penalty: 5 disc_reward_scale: 2 disc_weight_decay: 0.0001 normalize_amp_input: True enable_eps_greedy: False latent_dim: 64 latent_steps_min: 1 latent_steps_max: 150 amp_latent_grad_bonus: 0.00 amp_latent_grad_bonus_max: 100.0 amp_diversity_bonus: 0.01 amp_diversity_tar: 1.0 enc_coef: 5 enc_weight_decay: 0.0000 enc_reward_scale: 1 enc_grad_penalty: 0 task_reward_w: 0.0 disc_reward_w: 0.5 enc_reward_w: 0.5 ================================================ FILE: timechamber/learning/common_agent.py ================================================ # License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE] import copy from datetime import datetime from gym import spaces import numpy as np import os import time import yaml from rl_games.algos_torch import a2c_continuous from rl_games.algos_torch import torch_ext from rl_games.algos_torch import central_value from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.common import a2c_common from rl_games.common import datasets from rl_games.common import schedulers from rl_games.common import vecenv import torch from torch import optim from tensorboardX import SummaryWriter class CommonAgent(a2c_continuous.A2CAgent): def __init__(self, base_name, params): a2c_common.A2CBase.__init__(self, base_name, params) config = params['config'] self._load_config_params(config) self.is_discrete = False self._setup_action_space() self.bounds_loss_coef = config.get('bounds_loss_coef', None) self.clip_actions = config.get('clip_actions', True) self.network_path = config.get('network_path', "./runs") self.network_path = os.path.join(self.network_path, self.config['name']) self.network_path = os.path.join(self.network_path, 'nn') net_config = self._build_net_config() self.model = self.network.build(net_config) self.model.to(self.ppo_device) self.states = None self.init_rnn_from_model(self.model) self.last_lr = float(self.last_lr) self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay) if self.has_central_value: cv_config = { 'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape), 'value_size' : self.value_size, 'ppo_device' : self.ppo_device, 'num_agents' : self.num_agents, 'num_steps' : self.horizon_length, 'num_actors' : self.num_actors, 'num_actions' : self.actions_num, 'seq_len' : self.seq_len, 'model' : self.central_value_config['network'], 'config' : self.central_value_config, 'writter' : self.writer, 'multi_gpu' : self.multi_gpu } self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device) self.use_experimental_cv = self.config.get('use_experimental_cv', True) self.algo_observer.after_init(self) return def init_tensors(self): super().init_tensors() self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses']) self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values']) self.tensor_list += ['next_obses'] return def train(self): self.init_tensors() self.last_mean_rewards = -100500 start_time = time.time() total_time = 0 rep_count = 0 self.frame = 0 self.obs = self.env_reset() self.curr_frames = self.batch_size_envs self.model_output_file = os.path.join(self.network_path, self.config['name']) if self.multi_gpu: self.hvd.setup_algo(self) self._init_train() while True: epoch_num = self.update_epoch() train_info = self.train_epoch() sum_time = train_info['total_time'] total_time += sum_time frame = self.frame if self.multi_gpu: self.hvd.sync_stats(self) if self.rank == 0: scaled_time = sum_time scaled_play_time = train_info['play_time'] curr_frames = self.curr_frames self.frame += curr_frames if self.print_stats: fps_step = curr_frames / scaled_play_time fps_total = curr_frames / scaled_time print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}') self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame) self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame) self.writer.add_scalar('info/epochs', epoch_num, frame) self._log_train_info(train_info, frame) self.algo_observer.after_print_stats(frame, epoch_num, total_time) if self.game_rewards.current_size > 0: mean_rewards = self.game_rewards.get_mean() mean_lengths = self.game_lengths.get_mean() for i in range(self.value_size): self.writer.add_scalar('rewards/frame'.format(i), mean_rewards[i], frame) self.writer.add_scalar('rewards/iter'.format(i), mean_rewards[i], epoch_num) self.writer.add_scalar('rewards/time'.format(i), mean_rewards[i], total_time) self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame) self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num) if self.has_self_play_config: self.self_play_manager.update(self) if self.save_freq > 0: if (epoch_num % self.save_freq == 0): self.save(self.model_output_file + "_" + str(epoch_num)) if epoch_num > self.max_epochs: self.save(self.model_output_file) print('MAX EPOCHS NUM!') return self.last_mean_rewards, epoch_num update_time = 0 return def train_epoch(self): play_time_start = time.time() with torch.no_grad(): if self.is_rnn: batch_dict = self.play_steps_rnn() else: batch_dict = self.play_steps() play_time_end = time.time() update_time_start = time.time() rnn_masks = batch_dict.get('rnn_masks', None) self.set_train() self.curr_frames = batch_dict.pop('played_frames') self.prepare_dataset(batch_dict) self.algo_observer.after_steps() if self.has_central_value: self.train_central_value() train_info = None if self.is_rnn: frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement()) print(frames_mask_ratio) for _ in range(0, self.mini_epochs_num): ep_kls = [] for i in range(len(self.dataset)): curr_train_info = self.train_actor_critic(self.dataset[i]) print(type(curr_train_info)) if self.schedule_type == 'legacy': if self.multi_gpu: curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item()) self.update_lr(self.last_lr) if (train_info is None): train_info = dict() for k, v in curr_train_info.items(): train_info[k] = [v] else: for k, v in curr_train_info.items(): train_info[k].append(v) av_kls = torch_ext.mean_list(train_info['kl']) if self.schedule_type == 'standard': if self.multi_gpu: av_kls = self.hvd.average_value(av_kls, 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) self.update_lr(self.last_lr) if self.schedule_type == 'standard_epoch': if self.multi_gpu: av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls') self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) self.update_lr(self.last_lr) update_time_end = time.time() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start train_info['play_time'] = play_time train_info['update_time'] = update_time train_info['total_time'] = total_time self._record_train_batch_info(batch_dict, train_info) return train_info def play_steps(self): self.set_eval() epinfos = [] update_list = self.update_list for n in range(self.horizon_length): self.obs, done_env_ids = self._env_reset_done() self.experience_buffer.update_data('obses', n, self.obs['obs']) if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, masks) else: res_dict = self.get_action_values(self.obs) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) shaped_rewards = self.rewards_shaper(rewards) self.experience_buffer.update_data('rewards', n, shaped_rewards) self.experience_buffer.update_data('next_obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) terminated = infos['terminate'].float() terminated = terminated.unsqueeze(-1) next_vals = self._eval_critic(self.obs) next_vals *= (1.0 - terminated) self.experience_buffer.update_data('next_values', n, next_vals) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] self.game_rewards.update(self.current_rewards[done_indices]) self.game_lengths.update(self.current_lengths[done_indices]) self.algo_observer.process_infos(infos, done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_next_values = self.experience_buffer.tensor_dict['next_values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size return batch_dict def calc_gradients(self, input_dict): self.set_train() value_preds_batch = input_dict['old_values'] old_action_log_probs_batch = input_dict['old_logp_actions'] advantage = input_dict['advantages'] old_mu_batch = input_dict['mu'] old_sigma_batch = input_dict['sigma'] return_batch = input_dict['returns'] actions_batch = input_dict['actions'] obs_batch = input_dict['obs'] obs_batch = self._preproc_obs(obs_batch) lr = self.last_lr kl = 1.0 lr_mul = 1.0 curr_e_clip = lr_mul * self.e_clip batch_dict = { 'is_train': True, 'prev_actions': actions_batch, 'obs' : obs_batch } rnn_masks = None if self.is_rnn: rnn_masks = input_dict['rnn_masks'] batch_dict['rnn_states'] = input_dict['rnn_states'] batch_dict['seq_length'] = self.seq_len with torch.cuda.amp.autocast(enabled=self.mixed_precision): res_dict = self.model(batch_dict) action_log_probs = res_dict['prev_neglogp'] values = res_dict['value'] entropy = res_dict['entropy'] mu = res_dict['mu'] sigma = res_dict['sigma'] a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) a_loss = a_info['actor_loss'] c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) c_loss = c_info['critic_loss'] b_loss = self.bound_loss(mu) losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss, entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks) a_loss, c_loss, entropy, b_loss = losses[0], losses[1], losses[2], losses[3] loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss if self.multi_gpu: self.optimizer.zero_grad() else: for param in self.model.parameters(): param.grad = None self.scaler.scale(loss).backward() #TODO: Refactor this ugliest code of the year if self.truncate_grads: if self.multi_gpu: self.optimizer.synchronize() self.scaler.unscale_(self.optimizer) nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) with self.optimizer.skip_synchronize(): self.scaler.step(self.optimizer) self.scaler.update() else: self.scaler.unscale_(self.optimizer) nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) self.scaler.step(self.optimizer) self.scaler.update() else: self.scaler.step(self.optimizer) self.scaler.update() with torch.no_grad(): reduce_kl = not self.is_rnn kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) if self.is_rnn: kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask self.train_result = { 'entropy': entropy, 'kl': kl_dist, 'last_lr': self.last_lr, 'lr_mul': lr_mul, 'b_loss': b_loss } self.train_result.update(a_info) self.train_result.update(c_info) return def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values): lastgaelam = 0 mb_advs = torch.zeros_like(mb_rewards) for t in reversed(range(self.horizon_length)): not_done = 1.0 - mb_fdones[t] not_done = not_done.unsqueeze(1) delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t] lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam mb_advs[t] = lastgaelam return mb_advs def bound_loss(self, mu): if self.bounds_loss_coef is not None: soft_bound = 1.0 mu_loss_high = torch.maximum(mu - soft_bound, torch.tensor(0, device=self.ppo_device))**2 mu_loss_low = torch.minimum(mu + soft_bound, torch.tensor(0, device=self.ppo_device))**2 b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1) else: b_loss = 0 return b_loss def _load_config_params(self, config): self.last_lr = config['learning_rate'] return def _build_net_config(self): obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) config = { 'actions_num' : self.actions_num, 'input_shape' : obs_shape, 'num_seqs' : self.num_actors * self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value' : self.normalize_value, 'normalize_input': self.normalize_input, } return config def _setup_action_space(self): action_space = self.env_info['action_space'] self.actions_num = action_space.shape[0] # todo introduce device instead of cuda() self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device) self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device) return def _init_train(self): return def _env_reset_done(self): obs, done_env_ids = self.vec_env.reset_done() return self.obs_to_tensors(obs), done_env_ids def _eval_critic(self, obs_dict): self.model.eval() obs = obs_dict['obs'] processed_obs = self._preproc_obs(obs) if self.normalize_input: processed_obs = self.model.norm_obs(processed_obs) value = self.model.a2c_network.eval_critic(processed_obs) if self.normalize_value: value = self.value_mean_std(value, True) return value def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip): clip_frac = None if (self.ppo): ratio = torch.exp(old_action_log_probs_batch - action_log_probs) surr1 = advantage * ratio surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip, 1.0 + curr_e_clip) a_loss = torch.max(-surr1, -surr2) clipped = torch.abs(ratio - 1.0) > curr_e_clip clip_frac = torch.mean(clipped.float()) clip_frac = clip_frac.detach() else: a_loss = (action_log_probs * advantage) info = { 'actor_loss': a_loss, 'actor_clip_frac': clip_frac } return info def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value): if clip_value: value_pred_clipped = value_preds_batch + \ (values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip) value_losses = (values - return_batch)**2 value_losses_clipped = (value_pred_clipped - return_batch)**2 c_loss = torch.max(value_losses, value_losses_clipped) else: c_loss = (return_batch - values)**2 info = { 'critic_loss': c_loss } return info def _record_train_batch_info(self, batch_dict, train_info): return def _log_train_info(self, train_info, frame): self.writer.add_scalar('performance/update_time', train_info['update_time'], frame) self.writer.add_scalar('performance/play_time', train_info['play_time'], frame) self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame) self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame) self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame) self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame) self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame) self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame) self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame) self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame) self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame) return ================================================ FILE: timechamber/learning/common_player.py ================================================ # License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE] import torch from rl_games.algos_torch import players from rl_games.algos_torch import torch_ext from rl_games.algos_torch.running_mean_std import RunningMeanStd from rl_games.common.player import BasePlayer class CommonPlayer(players.PpoPlayerContinuous): def __init__(self, params): BasePlayer.__init__(self, params) self.network = self.config['network'] self.normalize_input = self.config['normalize_input'] self.normalize_value = self.config['normalize_value'] self._setup_action_space() self.mask = [False] net_config = self._build_net_config() self._build_net(net_config) return def run(self): n_games = self.games_num render = self.render_env n_game_life = self.n_game_life is_determenistic = self.is_determenistic sum_rewards = 0 sum_steps = 0 sum_game_res = 0 n_games = n_games * n_game_life games_played = 0 has_masks = False has_masks_func = getattr(self.env, "has_action_mask", None) is not None op_agent = getattr(self.env, "create_agent", None) if op_agent: agent_inited = True if has_masks_func: has_masks = self.env.has_action_mask() need_init_rnn = self.is_rnn for _ in range(n_games): if games_played >= n_games: break obs_dict = self.env_reset(self.env) batch_size = 1 batch_size = self.get_batch_size(obs_dict['obs'], batch_size) if need_init_rnn: self.init_rnn() need_init_rnn = False cr = torch.zeros(batch_size, dtype=torch.float32) steps = torch.zeros(batch_size, dtype=torch.float32) print_game_res = False for n in range(self.max_steps): obs_dict, done_env_ids = self._env_reset_done() if has_masks: masks = self.env.get_action_mask() action = self.get_masked_action(obs_dict, masks, is_determenistic) else: action = self.get_action(obs_dict, is_determenistic) obs_dict, r, done, info = self.env_step(self.env, action) cr += r steps += 1 self._post_step(info) if render: self.env.render(mode = 'human') time.sleep(self.render_sleep) all_done_indices = done.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] done_count = len(done_indices) games_played += done_count if done_count > 0: if self.is_rnn: for s in self.states: s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0 cur_rewards = cr[done_indices].sum().item() cur_steps = steps[done_indices].sum().item() cr = cr * (1.0 - done.float()) steps = steps * (1.0 - done.float()) sum_rewards += cur_rewards sum_steps += cur_steps game_res = 0.0 if isinstance(info, dict): if 'battle_won' in info: print_game_res = True game_res = info.get('battle_won', 0.5) if 'scores' in info: print_game_res = True game_res = info.get('scores', 0.5) if self.print_stats: if print_game_res: print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res) else: print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count) sum_game_res += game_res if batch_size//self.num_agents == 1 or games_played >= n_games: break print(sum_rewards) if print_game_res: print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life) else: print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life) return def obs_to_torch(self, obs): obs = super().obs_to_torch(obs) obs_dict = { 'obs': obs } return obs_dict def get_action(self, obs_dict, is_determenistic = False): output = super().get_action(obs_dict['obs'], is_determenistic) return output def _build_net(self, config): self.model = self.network.build(config) self.model.to(self.device) self.model.eval() self.is_rnn = self.model.is_rnn() return def _env_reset_done(self): obs, done_env_ids = self.env.reset_done() return self.obs_to_torch(obs), done_env_ids def _post_step(self, info): return def _build_net_config(self): obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) config = { 'actions_num' : self.actions_num, 'input_shape' : obs_shape, 'num_seqs' : self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value': self.normalize_value, 'normalize_input': self.normalize_input, } return config def _setup_action_space(self): self.actions_num = self.action_space.shape[0] self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device) self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device) return ================================================ FILE: timechamber/learning/hrl_sp_agent.py ================================================ import copy from collections import OrderedDict from datetime import datetime from gym import spaces import numpy as np import os import time from .pfsp_player_pool import PFSPPlayerPool, SinglePlayer, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \ PFSPPlayerVectorizedPool from rl_games.common.a2c_common import swap_and_flatten01 from rl_games.algos_torch import torch_ext from rl_games.algos_torch import central_value from isaacgym.torch_utils import * import torch from torch import optim from tensorboardX import SummaryWriter import torch.distributed as dist import timechamber.ase.hrl_agent as hrl_agent from timechamber.utils.utils import load_check, load_checkpoint class HRLSPAgent(hrl_agent.HRLAgent): def __init__(self, base_name, params): params['config']['device'] = params['device'] super().__init__(base_name, params) self.player_pool_type = params['player_pool_type'] self.base_model_config = { 'actions_num': self.actions_num, 'input_shape': self.obs_shape, 'num_seqs': self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value': self.normalize_value, 'normalize_input': self.normalize_input, } self.max_his_player_num = params['player_pool_length'] if params['op_load_path']: self.init_op_model = self.create_model() self.restore_op(params['op_load_path']) else: self.init_op_model = self.model self.players_dir = os.path.join(self.experiment_dir, 'policy_dir') os.makedirs(self.players_dir, exist_ok=True) self.update_win_rate = params['update_win_rate'] self.num_opponent_agents = params['num_agents'] - 1 self.player_pool = self._build_player_pool(params) self.games_to_check = params['games_to_check'] self.now_update_steps = 0 self.max_update_steps = params['max_update_steps'] self.update_op_num = 0 self.update_player_pool(self.init_op_model, player_idx=self.update_op_num) self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long)) assert self.num_actors % self.max_his_player_num == 0 def _build_player_pool(self, params): if self.player_pool_type == 'vectorized': vector_model_config = self.base_model_config vector_model_config['num_envs'] = self.num_actors * self.num_opponent_agents vector_model_config['population_size'] = self.max_his_player_num return PFSPPlayerVectorizedPool(max_length=self.max_his_player_num, device=self.device, vector_model_config=vector_model_config, params=params) else: return PFSPPlayerPool(max_length=self.max_his_player_num, device=self.device) def play_steps(self): self.set_eval() env_done_indices = torch.tensor([], device=self.device, dtype=torch.long) update_list = self.update_list step_time = 0.0 for n in range(self.horizon_length): self.obs = self.env_reset(env_done_indices) self.experience_buffer.update_data('obses', n, self.obs['obs']) if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, masks) else: res_dict_op = self.get_action_values(self.obs, is_op=True) res_dict = self.get_action_values(self.obs) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) if self.player_pool_type == 'multi_thread': self.player_pool.thread_pool.shutdown() step_time_start = time.time() self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'], res_dict_op['actions']) step_time_end = time.time() step_time += (step_time_end - step_time_start) shaped_rewards = self.rewards_shaper(rewards) if self.value_bootstrap and 'time_outs' in infos: shaped_rewards += self.gamma * res_dict['values'] * self.cast_obs(infos['time_outs']).unsqueeze( 1).float() self.experience_buffer.update_data('rewards', n, shaped_rewards) self.experience_buffer.update_data('next_obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards']) terminated = infos['terminate'].float() terminated = terminated.unsqueeze(-1) next_vals = self._eval_critic(self.obs) next_vals *= (1.0 - terminated) self.experience_buffer.update_data('next_values', n, next_vals) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) env_done_indices = self.dones.view(self.num_actors, self.num_agents).all(dim=1).nonzero(as_tuple=False) self.game_rewards.update(self.current_rewards[env_done_indices]) self.game_lengths.update(self.current_lengths[env_done_indices]) self.algo_observer.process_infos(infos, env_done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones self.player_pool.update_player_metric(infos=infos) self.resample_op(all_done_indices.flatten()) env_done_indices = env_done_indices[:, 0] last_values = self.get_values(self.obs) mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_next_values = self.experience_buffer.tensor_dict['next_values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards'] mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards) mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(swap_and_flatten01, self.tensor_list) batch_dict['returns'] = swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size batch_dict['step_time'] = step_time return batch_dict def env_step(self, ego_actions, op_actions): ego_actions = self.preprocess_actions(ego_actions) op_actions = self.preprocess_actions(op_actions) obs = self.obs['obs'] obs_op = self.obs['obs_op'] rewards = 0.0 disc_rewards = 0.0 done_count = 0.0 terminate_count = 0.0 win_count = 0.0 lose_count = 0.0 draw_count = 0.0 for t in range(self._llc_steps): llc_ego_actions = self._compute_llc_action(obs, ego_actions) llc_op_actions = self._compute_llc_action(obs_op, op_actions) llc_actions = torch.cat((llc_ego_actions, llc_op_actions), dim=0) obs_dict, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions) rewards += curr_rewards done_count += curr_dones terminate_count += infos['terminate'] win_count += infos['win'] lose_count += infos['lose'] draw_count += infos['draw'] amp_obs = infos['amp_obs'] curr_disc_reward = self._calc_disc_reward(amp_obs) disc_rewards += curr_disc_reward obs = obs_dict['obs'][:self.num_actors] obs_op = obs_dict['obs'][self.num_actors:] rewards /= self._llc_steps disc_rewards /= self._llc_steps dones = torch.zeros_like(done_count) dones[done_count > 0] = 1.0 terminate = torch.zeros_like(terminate_count) terminate[terminate_count > 0] = 1.0 infos['terminate'] = terminate infos['disc_rewards'] = disc_rewards wins = torch.zeros_like(win_count) wins[win_count > 0] = 1.0 infos['win'] = wins loses = torch.zeros_like(lose_count) loses[lose_count > 0] = 1.0 infos['lose'] = loses draws = torch.zeros_like(draw_count) draws[draw_count > 0] = 1.0 infos['draw'] = draws obs_dict = {} obs_dict['obs'] = obs obs_dict['obs_op'] = obs_op if self.is_tensor_obses: if self.value_size == 1: rewards = rewards.unsqueeze(1) return self.obs_to_tensors(obs_dict), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos else: if self.value_size == 1: rewards = np.expand_dims(rewards, axis=1) return self.obs_to_tensors(obs_dict), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(dones).to(self.ppo_device), infos def env_reset(self, env_ids=None): obs = self.vec_env.reset(env_ids) obs = self.obs_to_tensors(obs) obs['obs_op'] = obs['obs'][self.num_actors:] obs['obs'] = obs['obs'][:self.num_actors] return obs def train(self): self.init_tensors() self.mean_rewards = self.last_mean_rewards = -100500 start_time = time.time() total_time = 0 rep_count = 0 # self.frame = 0 # loading from checkpoint self.obs = self.env_reset() if self.multi_gpu: torch.cuda.set_device(self.rank) print("====================broadcasting parameters") model_params = [self.model.state_dict()] dist.broadcast_object_list(model_params, 0) self.model.load_state_dict(model_params[0]) self._init_train() while True: epoch_num = self.update_epoch() train_info = self.train_epoch() print(f"epoch num: {epoch_num}") sum_time = train_info['total_time'] step_time = train_info['step_time'] play_time = train_info['play_time'] update_time = train_info['update_time'] a_losses = train_info['actor_loss'] c_losses = train_info['critic_loss'] entropies = train_info['entropy'] kls = train_info['kl'] last_lr = train_info['last_lr'][-1] lr_mul = train_info['lr_mul'][-1] # cleaning memory to optimize space self.dataset.update_values_dict(None) total_time += sum_time curr_frames = self.curr_frames * self.rank_size if self.multi_gpu else self.curr_frames self.frame += curr_frames should_exit = False if self.rank == 0: self.diagnostics.epoch(self, current_epoch=epoch_num) scaled_time = self.num_agents * sum_time scaled_play_time = self.num_agents * play_time frame = self.frame // self.num_agents if self.print_stats: step_time = max(step_time, 1e-6) fps_step = curr_frames / step_time fps_step_inference = curr_frames / scaled_play_time fps_total = curr_frames / scaled_time print( f'fps step: {fps_step:.0f} fps step and policy inference: {fps_step_inference:.0f} fps total: {fps_total:.0f} epoch: {epoch_num}/{self.max_epochs}') self.write_stats(total_time, epoch_num, step_time, play_time, update_time, a_losses, c_losses, entropies, kls, last_lr, lr_mul, frame, scaled_time, scaled_play_time, curr_frames) self.algo_observer.after_print_stats(frame, epoch_num, total_time) if self.game_rewards.current_size > 0: mean_rewards = self.game_rewards.get_mean() mean_lengths = self.game_lengths.get_mean() self.mean_rewards = mean_rewards[0] for i in range(self.value_size): rewards_name = 'rewards' if i == 0 else 'rewards{0}'.format(i) self.writer.add_scalar(rewards_name + '/step'.format(i), mean_rewards[i], frame) self.writer.add_scalar(rewards_name + '/iter'.format(i), mean_rewards[i], epoch_num) self.writer.add_scalar(rewards_name + '/time'.format(i), mean_rewards[i], total_time) self.writer.add_scalar('episode_lengths/step', mean_lengths, frame) self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num) self.writer.add_scalar('episode_lengths/time', mean_lengths, total_time) # removed equal signs (i.e. "rew=") from the checkpoint name since it messes with hydra CLI parsing checkpoint_name = self.config['name'] + '_ep_' + str(epoch_num) + '_rew_' + str(mean_rewards[0]) if self.save_freq > 0: if (epoch_num % self.save_freq == 0) and (mean_rewards <= self.last_mean_rewards): self.save(os.path.join(self.nn_dir, 'last_' + checkpoint_name)) if mean_rewards[0] > self.last_mean_rewards and epoch_num >= self.save_best_after: print('saving next best rewards: ', mean_rewards) self.last_mean_rewards = mean_rewards[0] self.save(os.path.join(self.nn_dir, self.config['name'])) if 'score_to_win' in self.config: if self.last_mean_rewards > self.config['score_to_win']: print('Network won!') self.save(os.path.join(self.nn_dir, checkpoint_name)) should_exit = True if epoch_num >= self.max_epochs: if self.game_rewards.current_size == 0: print('WARNING: Max epochs reached before any env terminated at least once') mean_rewards = -np.inf self.save(os.path.join(self.nn_dir, 'last_' + self.config['name'] + 'ep' + str(epoch_num) + 'rew' + str( mean_rewards))) print('MAX EPOCHS NUM!') should_exit = True self.update_metric() update_time = 0 if self.multi_gpu: should_exit_t = torch.tensor(should_exit, device=self.device).float() dist.broadcast(should_exit_t, 0) should_exit = should_exit_t.bool().item() if should_exit: return self.last_mean_rewards, epoch_num def update_metric(self): tot_win_rate = 0 tot_games_num = 0 self.now_update_steps += 1 # self_player process for player in self.player_pool.players: win_rate = player.win_rate() games = player.games_num() self.writer.add_scalar(f'rate/win_rate_player_{player.player_idx}', win_rate, self.epoch_num) tot_win_rate += win_rate * games tot_games_num += games win_rate = tot_win_rate / tot_games_num if tot_games_num > self.games_to_check: self.check_update_opponent(win_rate) self.writer.add_scalar('rate/win_rate', win_rate, self.epoch_num) def get_action_values(self, obs, is_op=False): processed_obs = self._preproc_obs(obs['obs_op'] if is_op else obs['obs']) if not is_op: self.model.eval() input_dict = { 'is_train': False, 'prev_actions': None, 'obs': processed_obs, 'rnn_states': self.rnn_states } with torch.no_grad(): if is_op: res_dict = { "actions": torch.zeros((self.num_actors * self.num_opponent_agents, self.actions_num), device=self.device), "values": torch.zeros((self.num_actors * self.num_opponent_agents, 1), device=self.device) } self.player_pool.inference(input_dict, res_dict, processed_obs) else: res_dict = self.model(input_dict) if self.has_central_value: states = obs['states'] input_dict = { 'is_train': False, 'states': states, } value = self.get_central_value(input_dict) res_dict['values'] = value return res_dict def restore(self, fn): checkpoint = load_checkpoint(fn, device=self.device) checkpoint = load_check(checkpoint=checkpoint, normalize_input=self.normalize_input, normalize_value=self.normalize_value) self.set_full_state_weights(checkpoint) def resample_op(self, resample_indices): for op_idx in range(self.num_opponent_agents): for player in self.player_pool.players: player.remove_envs(resample_indices + op_idx * self.num_actors) for op_idx in range(self.num_opponent_agents): for env_idx in resample_indices: player = self.player_pool.sample_player() player.add_envs(env_idx + op_idx * self.num_actors) for player in self.player_pool.players: player.reset_envs() def resample_batch(self): env_indices = torch.arange(end=self.num_actors * self.num_opponent_agents, device=self.device, dtype=torch.long, requires_grad=False) step = self.num_actors // 32 for player in self.player_pool.players: player.clear_envs() for i in range(0, self.num_actors, step): player = self.player_pool.sample_player() player.add_envs(env_indices[i:i + step]) print("resample done") def restore_op(self, fn): checkpoint = load_checkpoint(fn, device=self.device) checkpoint = load_check(checkpoint, normalize_input=self.normalize_input, normalize_value=self.normalize_value) self.init_op_model.load_state_dict(checkpoint['model']) if self.normalize_input and 'running_mean_std' in checkpoint: self.init_op_model.running_mean_std.load_state_dict(checkpoint['running_mean_std']) def check_update_opponent(self, win_rate): if win_rate > self.update_win_rate or self.now_update_steps > self.max_update_steps: print(f'winrate:{win_rate},add opponent to player pool') self.update_op_num += 1 self.now_update_steps = 0 self.update_player_pool(self.model, player_idx=self.update_op_num) self.player_pool.clear_player_metric() self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long)) self.save(os.path.join(self.players_dir, f'policy_{self.update_op_num}')) def create_model(self): model = self.network.build(self.base_model_config) model.to(self.device) return model def update_player_pool(self, model, player_idx): new_model = self.create_model() new_model.load_state_dict(copy.deepcopy(model.state_dict())) if hasattr(model, 'running_mean_std'): new_model.running_mean_std.load_state_dict(copy.deepcopy(model.running_mean_std.state_dict())) player = SinglePlayer(player_idx, new_model, self.device, self.num_actors * self.num_opponent_agents) self.player_pool.add_player(player) ================================================ FILE: timechamber/learning/hrl_sp_player.py ================================================ # License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE] import os import time import torch import numpy as np from rl_games.algos_torch import players import random from rl_games.algos_torch import torch_ext from rl_games.common.tr_helpers import unsqueeze_obs from timechamber.ase import hrl_players from timechamber.utils.utils import load_check, load_checkpoint from .pfsp_player_pool import PFSPPlayerPool, PFSPPlayerVectorizedPool, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \ SinglePlayer import matplotlib.pyplot as plt from multielo import MultiElo class HRLSPPlayer(hrl_players.HRLPlayer): def __init__(self, params): params['config']['device_name'] = params['device'] super().__init__(params) print(f'params:{params}') self.network = self.config['network'] self.mask = [False] self.is_rnn = False self.normalize_input = self.config['normalize_input'] self.normalize_value = self.config.get('normalize_value', False) self.base_model_config = { 'actions_num': self.actions_num, 'input_shape': self.obs_shape, 'num_seqs': self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value': self.normalize_value, 'normalize_input': self.normalize_input, } self.policy_timestep = [] self.policy_op_timestep = [] self.params = params self.record_elo = self.player_config.get('record_elo', False) self.init_elo = self.player_config.get('init_elo', 400) self.num_actors = params['config']['num_actors'] self.player_pool_type = params['player_pool_type'] self.player_pool = None self.op_player_pool = None self.num_opponents = params['num_agents'] - 1 self.max_steps = 1000 self.update_op_num = 0 self.players_per_env = [] self.elo = MultiElo() def restore(self, load_dir): if os.path.isdir(load_dir): self.player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir))) print('dir:', load_dir) sorted_players = [] for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)): model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint)) self.policy_timestep.append(model_timestep) model = self.load_model(load_dir + '/' + str(policy_check_checkpoint)) new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device, rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents) sorted_players.append(new_player) sorted_players.sort(key=lambda player: player.player_idx) for idx, player in enumerate(sorted_players): player.player_idx = idx self.player_pool.add_player(player) self.policy_timestep.sort() else: self.player_pool = self._build_player_pool(params=self.params, player_num=1) self.policy_timestep.append(os.path.getmtime(load_dir)) model = self.load_model(load_dir) new_player = SinglePlayer(player_idx=0, model=model, device=self.device, rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents) self.player_pool.add_player(new_player) self.restore_op(self.params['op_load_path']) self._norm_policy_timestep() self._alloc_env_indices() def restore_op(self, load_dir): if os.path.isdir(load_dir): self.op_player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir))) sorted_players = [] for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)): model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint)) self.policy_op_timestep.append(model_timestep) model = self.load_model(load_dir + '/' + str(policy_check_checkpoint)) new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device, rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents) sorted_players.append(new_player) sorted_players.sort(key=lambda player: player.player_idx) for idx, player in enumerate(sorted_players): player.player_idx = idx self.op_player_pool.add_player(player) self.policy_op_timestep.sort() else: self.op_player_pool = self._build_player_pool(params=self.params, player_num=1) self.policy_op_timestep.append(os.path.getmtime(load_dir)) model = self.load_model(load_dir) new_player = SinglePlayer(player_idx=0, model=model, device=self.device, rating=400, obs_batch_len=self.num_actors * self.num_opponents) self.op_player_pool.add_player(new_player) def _alloc_env_indices(self): for idx in range(self.num_actors): player_idx = random.randint(0, len(self.player_pool.players) - 1) self.player_pool.players[player_idx].add_envs(torch.tensor([idx], dtype=torch.long, device=self.device)) env_player = [self.player_pool.players[player_idx]] for op_idx in range(self.num_opponents): op_player_idx = random.randint(0, len(self.op_player_pool.players) - 1) self.op_player_pool.players[op_player_idx].add_envs( torch.tensor([idx + op_idx * self.num_actors], dtype=torch.long, device=self.device)) env_player.append(self.op_player_pool.players[op_player_idx]) self.players_per_env.append(env_player) for player in self.player_pool.players: player.reset_envs() for player in self.op_player_pool.players: player.reset_envs() def _build_player_pool(self, params, player_num): if self.player_pool_type == 'multi_thread': return PFSPPlayerProcessPool(max_length=player_num, device=self.device) elif self.player_pool_type == 'multi_process': return PFSPPlayerThreadPool(max_length=player_num, device=self.device) elif self.player_pool_type == 'vectorized': vector_model_config = self.base_model_config vector_model_config['num_envs'] = self.num_actors * self.num_opponents vector_model_config['population_size'] = player_num return PFSPPlayerVectorizedPool(max_length=player_num, device=self.device, vector_model_config=vector_model_config, params=params) else: return PFSPPlayerPool(max_length=player_num, device=self.device) def _update_rating(self, info, env_indices): for env_idx in env_indices: if self.num_opponents == 1: player = self.players_per_env[env_idx][0] op_player = self.players_per_env[env_idx][1] if info['win'][env_idx]: player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating]) elif info['lose'][env_idx]: op_player.rating, player.rating = self.elo.get_new_ratings([op_player.rating, player.rating]) elif info['draw'][env_idx]: player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating], result_order=[1, 1]) else: ranks = info['ranks'][env_idx].cpu().numpy() players_sorted_by_rank = sorted(enumerate(self.players_per_env[env_idx]), key=lambda x: ranks[x[0]]) sorted_ranks = sorted(ranks) now_ratings = [player.rating for idx, player in players_sorted_by_rank] new_ratings = self.elo.get_new_ratings(now_ratings, result_order=sorted_ranks) for idx, new_rating in enumerate(new_ratings): players_sorted_by_rank[idx][1].rating = new_rating def run(self): n_games = self.games_num render = self.render_env n_game_life = self.n_game_life is_determenistic = self.is_determenistic sum_rewards = 0 sum_steps = 0 sum_game_res = 0 n_games = n_games * n_game_life games_played = 0 has_masks = False has_masks_func = getattr(self.env, "has_action_mask", None) is not None if has_masks_func: has_masks = self.env.has_action_mask() print(f'games_num:{n_games}') need_init_rnn = self.is_rnn for _ in range(n_games): if games_played >= n_games: break obses = self.env_reset(self.env) batch_size = 1 batch_size = self.get_batch_size(obses['obs'], batch_size) if need_init_rnn: self.init_rnn() need_init_rnn = False cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device) steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device) print_game_res = False done_indices = torch.tensor([], device=self.device, dtype=torch.long) for n in range(self.max_steps): obses = self.env_reset(self.env, done_indices) if has_masks: masks = self.env.get_action_mask() action = self.get_masked_action( obses, masks, is_determenistic) else: action = self.get_action(obses['obs'], is_determenistic) action_op = self.get_action(obses['obs_op'], is_determenistic, is_op=True) obses, r, done, info = self.env_step(self.env, obses, action, action_op) cr += r steps += 1 if render: self.env.render(mode='human') time.sleep(self.render_sleep) all_done_indices = done.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] done_count = len(done_indices) games_played += done_count if self.record_elo: self._update_rating(info, all_done_indices.flatten()) if done_count > 0: if self.is_rnn: for s in self.states: s[:, all_done_indices, :] = s[:, all_done_indices, :] * 0.0 cur_rewards = cr[done_indices].sum().item() cur_steps = steps[done_indices].sum().item() cr = cr * (1.0 - done.float()) steps = steps * (1.0 - done.float()) sum_rewards += cur_rewards sum_steps += cur_steps game_res = 0.0 if isinstance(info, dict): if 'battle_won' in info: print_game_res = True game_res = info.get('battle_won', 0.5) if 'scores' in info: print_game_res = True game_res = info.get('scores', 0.5) if self.print_stats: if print_game_res: print('reward:', cur_rewards / done_count, 'steps:', cur_steps / done_count, 'w:', game_res) else: print('reward:', cur_rewards / done_count, 'steps:', cur_steps / done_count) sum_game_res += game_res if batch_size // self.num_agents == 1 or games_played >= n_games: break done_indices = done_indices[:, 0] if self.record_elo: self._plot_elo_curve() def _plot_elo_curve(self): x = np.array(self.policy_timestep) y = np.arange(len(self.player_pool.players)) x_op = np.array(self.policy_op_timestep) y_op = np.arange(len(self.op_player_pool.players)) for player in self.player_pool.players: idx = player.player_idx y[idx] = player.rating for player in self.op_player_pool.players: idx = player.player_idx y_op[idx] = player.rating if self.params['load_path'] != self.params['op_load_path']: l1 = plt.plot(x, y, 'b--', label='policy') l2 = plt.plot(x_op, y_op, 'r--', label='policy_op') plt.plot(x, y, 'b^-', x_op, y_op, 'ro-') else: l1 = plt.plot(x, y, 'b--', label='policy') plt.plot(x, y, 'b^-') plt.title('ELO Curve') plt.xlabel('timestep/days') plt.ylabel('ElO') plt.legend() plt.savefig(self.params['load_path'] + '/../elo.jpg') def get_action(self, obs, is_determenistic=False, is_op=False): if self.has_batch_dimension == False: obs = unsqueeze_obs(obs) obs = self._preproc_obs(obs) input_dict = { 'is_train': False, 'prev_actions': None, 'obs': obs, 'rnn_states': self.states } with torch.no_grad(): data_len = self.num_actors * self.num_opponents if is_op else self.num_actors res_dict = { "actions": torch.zeros((data_len, self.actions_num), device=self.device), "values": torch.zeros((data_len, 1), device=self.device), "mus": torch.zeros((data_len, self.actions_num), device=self.device) } if is_op: self.op_player_pool.inference(input_dict, res_dict, obs) else: self.player_pool.inference(input_dict, res_dict, obs) mu = res_dict['mus'] action = res_dict['actions'] if is_determenistic: current_action = mu else: current_action = action current_action = torch.squeeze(current_action.detach()) return torch.clamp(current_action, -1.0, 1.0) def _norm_policy_timestep(self): self.policy_op_timestep.sort() self.policy_timestep.sort() for idx in range(1, len(self.policy_op_timestep)): self.policy_op_timestep[idx] -= self.policy_op_timestep[0] self.policy_op_timestep[idx] /= 3600 * 24 for idx in range(1, len(self.policy_timestep)): self.policy_timestep[idx] -= self.policy_timestep[0] self.policy_timestep[idx] /= 3600 * 24 self.policy_timestep[0] = 0 if len(self.policy_op_timestep): self.policy_op_timestep[0] = 0 def env_reset(self, env, env_ids=None): obs = env.reset(env_ids) obs_dict = {} obs_dict['obs_op'] = obs[self.num_actors:] obs_dict['obs'] = obs[:self.num_actors] return obs_dict def env_step(self, env, obs_dict, ego_actions, op_actions): obs = obs_dict['obs'] obs_op = obs_dict['obs_op'] rewards = 0.0 done_count = 0.0 disc_rewards = 0.0 terminate_count = 0.0 win_count = 0.0 lose_count = 0.0 draw_count = 0.0 for t in range(self._llc_steps): llc_ego_actions = self._compute_llc_action(obs, ego_actions) llc_op_actions = self._compute_llc_action(obs_op, op_actions) llc_actions = torch.cat((llc_ego_actions, llc_op_actions), dim=0) obs_all, curr_rewards, curr_dones, infos = env.step(llc_actions) rewards += curr_rewards done_count += curr_dones terminate_count += infos['terminate'] win_count += infos['win'] lose_count += infos['lose'] draw_count += infos['draw'] amp_obs = infos['amp_obs'] curr_disc_reward = self._calc_disc_reward(amp_obs) curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy() disc_rewards += curr_disc_reward obs = obs_all[:self.num_actors] obs_op = obs_all[self.num_actors:] rewards /= self._llc_steps disc_rewards /= self._llc_steps dones = torch.zeros_like(done_count) dones[done_count > 0] = 1.0 terminate = torch.zeros_like(terminate_count) terminate[terminate_count > 0] = 1.0 infos['terminate'] = terminate infos['disc_rewards'] = disc_rewards wins = torch.zeros_like(win_count) wins[win_count > 0] = 1.0 infos['win'] = wins loses = torch.zeros_like(lose_count) loses[lose_count > 0] = 1.0 infos['lose'] = loses draws = torch.zeros_like(draw_count) draws[draw_count > 0] = 1.0 infos['draw'] = draws next_obs_dict = {} next_obs_dict['obs_op'] = obs_op next_obs_dict['obs'] = obs if self.value_size > 1: rewards = rewards[0] if self.is_tensor_obses: return self.obs_to_torch(next_obs_dict), rewards.cpu(), dones.cpu(), infos else: if np.isscalar(dones): rewards = np.expand_dims(np.asarray(rewards), 0) dones = np.expand_dims(np.asarray(dones), 0) return next_obs_dict, rewards, dones, infos def create_model(self): model = self.network.build(self.base_model_config) model.to(self.device) return model def load_model(self, fn): model = self.create_model() checkpoint = load_checkpoint(fn, device=self.device) checkpoint = load_check(checkpoint, normalize_input=self.normalize_input, normalize_value=self.normalize_value) model.load_state_dict(checkpoint['model']) if self.normalize_input and 'running_mean_std' in checkpoint: model.running_mean_std.load_state_dict(checkpoint['running_mean_std']) return model ================================================ FILE: timechamber/learning/pfsp_player_pool.py ================================================ import collections import random import torch import torch.multiprocessing as mp import dill # import time from rl_games.algos_torch import model_builder from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED def player_inference_thread(model, input_dict, res_dict, env_indices, processed_obs): if len(env_indices) == 0: return None input_dict['obs'] = processed_obs[env_indices] out_dict = model(input_dict) for key in res_dict: res_dict[key][env_indices] = out_dict[key] return out_dict def player_inference_process(pipe, queue, barrier): input_dict = { 'is_train': False, 'prev_actions': None, 'obs': None, 'rnn_states': None, } model = None barrier.wait() while True: msg = pipe.recv() task = msg['task'] if task == 'init': if model is not None: del model model = queue.get() model = dill.loads(model) barrier.wait() elif task == 'forward': obs, actions, values, env_indices = queue.get() input_dict['obs'] = obs[env_indices] out_dict = model(input_dict) actions[env_indices] = out_dict['actions'] values[env_indices] = out_dict['values'] barrier.wait() del obs, actions, values, env_indices elif task == 'terminate': break else: barrier.wait() class SinglePlayer: def __init__(self, player_idx, model, device, obs_batch_len=0, rating=None): self.model = model if model: self.model.eval() self.player_idx = player_idx self._games = torch.tensor(0, device=device, dtype=torch.float) self._wins = torch.tensor(0, device=device, dtype=torch.float) self._loses = torch.tensor(0, device=device, dtype=torch.float) self._draws = torch.tensor(0, device=device, dtype=torch.float) self._decay = 0.998 self._has_env = torch.zeros((obs_batch_len,), device=device, dtype=torch.bool) self.device = device self.env_indices = torch.tensor([], device=device, dtype=torch.long, requires_grad=False) if rating: self.rating = rating def __call__(self, input_dict): return self.model(input_dict) def reset_envs(self): self.env_indices = self._has_env.nonzero(as_tuple=True) def remove_envs(self, env_indices): self._has_env[env_indices] = False def add_envs(self, env_indices): self._has_env[env_indices] = True def clear_envs(self): self.env_indices = torch.tensor([], device=self.device, dtype=torch.long, requires_grad=False) def update_metric(self, wins, loses, draws): win_count = torch.sum(wins[self.env_indices]) lose_count = torch.sum(loses[self.env_indices]) draw_count = torch.sum(draws[self.env_indices]) for stats in (self._games, self._wins, self._loses, self._draws): stats *= self._decay self._games += win_count + lose_count + draw_count self._wins += win_count self._loses += lose_count self._draws += draw_count def clear_metric(self): self._games = torch.tensor(0, device=self.device, dtype=torch.float) self._wins = torch.tensor(0, device=self.device, dtype=torch.float) self._loses = torch.tensor(0, device=self.device, dtype=torch.float) self._draws = torch.tensor(0, device=self.device, dtype=torch.float) def win_rate(self): if self.model is None: return 0 elif self._games == 0: return 0.5 return (self._wins + 0.5 * self._draws) / self._games def games_num(self): return self._games class PFSPPlayerPool: def __init__(self, max_length, device): assert max_length > 0 self.players = [] self.max_length = max_length self.idx = 0 self.device = device self.weightings = { "variance": lambda x: x * (1 - x), "linear": lambda x: 1 - x, "squared": lambda x: (1 - x) ** 2, } def add_player(self, player): if len(self.players) < self.max_length: self.players.append(player) else: self.players[self.idx] = player self.idx += 1 self.idx %= self.max_length def sample_player(self, weight='linear'): weight_func = self.weightings[weight] player = \ random.choices(self.players, weights=[weight_func(player.win_rate()) for player in self.players])[0] return player def update_player_metric(self, infos): for player in self.players: player.update_metric(infos['win'], infos['lose'], infos['draw']) def clear_player_metric(self): for player in self.players: player.clear_metric() def inference(self, input_dict, res_dict, processed_obs): for i, player in enumerate(self.players): if len(player.env_indices[0]) == 0: continue input_dict['obs'] = processed_obs[player.env_indices] out_dict = player(input_dict) for key in res_dict: res_dict[key][player.env_indices] = out_dict[key] class PFSPPlayerVectorizedPool(PFSPPlayerPool): def __init__(self, max_length, device, vector_model_config, params): super(PFSPPlayerVectorizedPool, self).__init__(max_length, device) params['model']['name'] = 'vectorized_a2c' params['network']['name'] = 'vectorized_a2c' builder = model_builder.ModelBuilder() self.vectorized_network = builder.load(params) self.vectorized_model = self.vectorized_network.build(vector_model_config) self.vectorized_model.to(self.device) self.vectorized_model.eval() self.obs = torch.zeros( (self.max_length, vector_model_config["num_envs"], vector_model_config['input_shape'][0]), dtype=torch.float32, device=self.device) for idx in range(max_length): self.add_player(SinglePlayer(idx, None, self.device, vector_model_config["num_envs"])) def inference(self, input_dict, res_dict, processed_obs): for i, player in enumerate(self.players): self.obs[i][player.env_indices] = processed_obs[player.env_indices] input_dict['obs'] = self.obs out_dict = self.vectorized_model(input_dict) for i, player in enumerate(self.players): if len(player.env_indices) == 0: continue for key in res_dict: res_dict[key][player.env_indices] = out_dict[key][i][player.env_indices] def add_player(self, player): if player.model: self.vectorized_model.update(self.idx, player.model) super().add_player(player) class PFSPPlayerThreadPool(PFSPPlayerPool): def __init__(self, max_length, device): super().__init__(max_length, device) self.thread_pool = ThreadPoolExecutor(max_workers=self.max_length) def inference(self, input_dict, res_dict, processed_obs): self.thread_pool.map(player_inference_thread, [player.model for player in self.players], [input_dict for _ in range(len(self.players))], [res_dict for _ in range(len(self.players))], [player.env_indices for player in self.players], [processed_obs for _ in range(len(self.players))]) class PFSPPlayerProcessPool(PFSPPlayerPool): def __init__(self, max_length, device): super(PFSPPlayerProcessPool, self).__init__(max_length, device) self.inference_processes = [] self.queues = [] self.producer_pipes = [] self.consumer_pipes = [] self.barrier = mp.Barrier(self.max_length + 1) mp.set_start_method(method='spawn', force=True) self._init_inference_processes() def _init_inference_processes(self): for _ in range(self.max_length): queue = mp.Queue() self.queues.append(queue) pipe_read, pipe_write = mp.Pipe(duplex=False) self.producer_pipes.append(pipe_write) self.consumer_pipes.append(pipe_read) process = mp.Process(target=player_inference_process, args=(pipe_read, queue, self.barrier), daemon=True) self.inference_processes.append(process) process.start() self.barrier.wait() def add_player(self, player): with torch.no_grad(): model = dill.dumps(player.model) for i in range(self.max_length): if i == self.idx: self.producer_pipes[i].send({'task': 'init'}) self.queues[i].put(model) else: self.producer_pipes[i].send({'task': 'continue'}) self.barrier.wait() if len(self.players) < self.max_length: self.players.append(player) else: self.players[self.idx] = player self.idx += 1 self.idx %= self.max_length def inference(self, input_dict, res_dict, processed_obs): for i in range(self.max_length): if i < len(self.players) and len(self.players[i].env_indices): self.producer_pipes[i].send({'task': 'forward'}) self.queues[i].put( (processed_obs, res_dict['actions'], res_dict['values'], self.players[i].env_indices)) else: self.producer_pipes[i].send({'task': 'continue'}) def __del__(self): for pipe in self.producer_pipes: pipe.send({'task': 'terminate'}) for process in self.inference_processes: process.join() ================================================ FILE: timechamber/learning/ppo_sp_agent.py ================================================ # License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE] import copy from datetime import datetime from gym import spaces import numpy as np import os import time from .pfsp_player_pool import PFSPPlayerPool, SinglePlayer, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \ PFSPPlayerVectorizedPool from timechamber.utils.utils import load_checkpoint from rl_games.algos_torch import a2c_continuous from rl_games.common.a2c_common import swap_and_flatten01 from rl_games.algos_torch import torch_ext from rl_games.algos_torch import central_value import torch from torch import optim from tensorboardX import SummaryWriter import torch.distributed as dist class SPAgent(a2c_continuous.A2CAgent): def __init__(self, base_name, params): params['config']['device'] = params['device'] super().__init__(base_name, params) self.player_pool_type = params['player_pool_type'] self.base_model_config = { 'actions_num': self.actions_num, 'input_shape': self.obs_shape, 'num_seqs': self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value': self.normalize_value, 'normalize_input': self.normalize_input, } self.max_his_player_num = params['player_pool_length'] if params['op_load_path']: self.init_op_model = self.create_model() self.restore_op(params['op_load_path']) else: self.init_op_model = self.model self.players_dir = os.path.join(self.experiment_dir, 'policy_dir') os.makedirs(self.players_dir, exist_ok=True) self.update_win_rate = params['update_win_rate'] self.num_opponent_agents = params['num_agents'] - 1 self.player_pool = self._build_player_pool(params) self.games_to_check = params['games_to_check'] self.now_update_steps = 0 self.max_update_steps = params['max_update_steps'] self.update_op_num = 0 self.update_player_pool(self.init_op_model, player_idx=self.update_op_num) self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long)) assert self.num_actors % self.max_his_player_num == 0 def _build_player_pool(self, params): if self.player_pool_type == 'multi_thread': return PFSPPlayerProcessPool(max_length=self.max_his_player_num, device=self.device) elif self.player_pool_type == 'multi_process': return PFSPPlayerThreadPool(max_length=self.max_his_player_num, device=self.device) elif self.player_pool_type == 'vectorized': vector_model_config = self.base_model_config vector_model_config['num_envs'] = self.num_actors * self.num_opponent_agents vector_model_config['population_size'] = self.max_his_player_num return PFSPPlayerVectorizedPool(max_length=self.max_his_player_num, device=self.device, vector_model_config=vector_model_config, params=params) else: return PFSPPlayerPool(max_length=self.max_his_player_num, device=self.device) def play_steps(self): update_list = self.update_list step_time = 0.0 env_done_indices = torch.tensor([], device=self.device, dtype=torch.long) for n in range(self.horizon_length): self.obs = self.env_reset(env_done_indices) if self.use_action_masks: masks = self.vec_env.get_action_masks() res_dict = self.get_masked_action_values(self.obs, masks) else: res_dict_op = self.get_action_values(self.obs, is_op=True) res_dict = self.get_action_values(self.obs) self.experience_buffer.update_data('obses', n, self.obs['obs']) self.experience_buffer.update_data('dones', n, self.dones) for k in update_list: self.experience_buffer.update_data(k, n, res_dict[k]) if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) if self.player_pool_type == 'multi_thread': self.player_pool.thread_pool.shutdown() step_time_start = time.time() self.obs, rewards, self.dones, infos = self.env_step( torch.cat((res_dict['actions'], res_dict_op['actions']), dim=0)) step_time_end = time.time() step_time += (step_time_end - step_time_start) shaped_rewards = self.rewards_shaper(rewards) if self.value_bootstrap and 'time_outs' in infos: shaped_rewards += self.gamma * res_dict['values'] * self.cast_obs(infos['time_outs']).unsqueeze( 1).float() self.experience_buffer.update_data('rewards', n, shaped_rewards) self.current_rewards += rewards self.current_lengths += 1 all_done_indices = self.dones.nonzero(as_tuple=False) env_done_indices = self.dones.view(self.num_actors, self.num_agents).all(dim=1).nonzero(as_tuple=False) # print(f"env done indices: {env_done_indices}") # print(f"self.dones {self.dones}") self.game_rewards.update(self.current_rewards[env_done_indices]) self.game_lengths.update(self.current_lengths[env_done_indices]) self.algo_observer.process_infos(infos, env_done_indices) not_dones = 1.0 - self.dones.float() self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) self.current_lengths = self.current_lengths * not_dones self.player_pool.update_player_metric(infos=infos) self.resample_op(all_done_indices.flatten()) env_done_indices = env_done_indices[:, 0] last_values = self.get_values(self.obs) fdones = self.dones.float() mb_fdones = self.experience_buffer.tensor_dict['dones'].float() mb_values = self.experience_buffer.tensor_dict['values'] mb_rewards = self.experience_buffer.tensor_dict['rewards'] mb_advs = self.discount_values(fdones, last_values, mb_fdones, mb_values, mb_rewards) mb_returns = mb_advs + mb_values batch_dict = self.experience_buffer.get_transformed_list(swap_and_flatten01, self.tensor_list) batch_dict['returns'] = swap_and_flatten01(mb_returns) batch_dict['played_frames'] = self.batch_size batch_dict['step_time'] = step_time return batch_dict def env_step(self, actions): actions = self.preprocess_actions(actions) obs, rewards, dones, infos = self.vec_env.step(actions) obs['obs_op'] = obs['obs'][self.num_actors:] obs['obs'] = obs['obs'][:self.num_actors] if self.is_tensor_obses: if self.value_size == 1: rewards = rewards.unsqueeze(1) return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos else: if self.value_size == 1: rewards = np.expand_dims(rewards, axis=1) return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy( dones).to(self.ppo_device), infos def env_reset(self, env_ids=None): obs = self.vec_env.reset(env_ids) obs = self.obs_to_tensors(obs) obs['obs_op'] = obs['obs'][self.num_actors:] obs['obs'] = obs['obs'][:self.num_actors] return obs def train(self): self.init_tensors() self.mean_rewards = self.last_mean_rewards = -100500 start_time = time.time() total_time = 0 rep_count = 0 # self.frame = 0 # loading from checkpoint self.obs = self.env_reset() if self.multi_gpu: torch.cuda.set_device(self.rank) print("====================broadcasting parameters") model_params = [self.model.state_dict()] dist.broadcast_object_list(model_params, 0) self.model.load_state_dict(model_params[0]) while True: epoch_num = self.update_epoch() step_time, play_time, update_time, sum_time, a_losses, c_losses, b_losses, entropies, kls, last_lr, lr_mul = self.train_epoch() # cleaning memory to optimize space self.dataset.update_values_dict(None) total_time += sum_time curr_frames = self.curr_frames * self.rank_size if self.multi_gpu else self.curr_frames self.frame += curr_frames should_exit = False if self.rank == 0: self.diagnostics.epoch(self, current_epoch=epoch_num) scaled_time = self.num_agents * sum_time scaled_play_time = self.num_agents * play_time frame = self.frame // self.num_agents if self.print_stats: step_time = max(step_time, 1e-6) fps_step = curr_frames / step_time fps_step_inference = curr_frames / scaled_play_time fps_total = curr_frames / scaled_time print( f'fps step: {fps_step:.0f} fps step and policy inference: {fps_step_inference:.0f} fps total: {fps_total:.0f} epoch: {epoch_num}/{self.max_epochs}') self.write_stats(total_time, epoch_num, step_time, play_time, update_time, a_losses, c_losses, entropies, kls, last_lr, lr_mul, frame, scaled_time, scaled_play_time, curr_frames) self.algo_observer.after_print_stats(frame, epoch_num, total_time) if self.game_rewards.current_size > 0: mean_rewards = self.game_rewards.get_mean() mean_lengths = self.game_lengths.get_mean() self.mean_rewards = mean_rewards[0] for i in range(self.value_size): rewards_name = 'rewards' if i == 0 else 'rewards{0}'.format(i) self.writer.add_scalar(rewards_name + '/step'.format(i), mean_rewards[i], frame) self.writer.add_scalar(rewards_name + '/iter'.format(i), mean_rewards[i], epoch_num) self.writer.add_scalar(rewards_name + '/time'.format(i), mean_rewards[i], total_time) self.writer.add_scalar('episode_lengths/step', mean_lengths, frame) self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num) self.writer.add_scalar('episode_lengths/time', mean_lengths, total_time) # removed equal signs (i.e. "rew=") from the checkpoint name since it messes with hydra CLI parsing checkpoint_name = self.config['name'] + '_ep_' + str(epoch_num) + '_rew_' + str(mean_rewards[0]) if self.save_freq > 0: if (epoch_num % self.save_freq == 0) and (mean_rewards <= self.last_mean_rewards): self.save(os.path.join(self.nn_dir, 'last_' + checkpoint_name)) if mean_rewards[0] > self.last_mean_rewards and epoch_num >= self.save_best_after: print('saving next best rewards: ', mean_rewards) self.last_mean_rewards = mean_rewards[0] self.save(os.path.join(self.nn_dir, self.config['name'])) if 'score_to_win' in self.config: if self.last_mean_rewards > self.config['score_to_win']: print('Network won!') self.save(os.path.join(self.nn_dir, checkpoint_name)) should_exit = True if epoch_num >= self.max_epochs: if self.game_rewards.current_size == 0: print('WARNING: Max epochs reached before any env terminated at least once') mean_rewards = -np.inf self.save(os.path.join(self.nn_dir, 'last_' + self.config['name'] + 'ep' + str(epoch_num) + 'rew' + str( mean_rewards))) print('MAX EPOCHS NUM!') should_exit = True self.update_metric() update_time = 0 if self.multi_gpu: should_exit_t = torch.tensor(should_exit, device=self.device).float() dist.broadcast(should_exit_t, 0) should_exit = should_exit_t.bool().item() if should_exit: return self.last_mean_rewards, epoch_num def update_metric(self): tot_win_rate = 0 tot_games_num = 0 self.now_update_steps += 1 # self_player process for player in self.player_pool.players: win_rate = player.win_rate() games = player.games_num() self.writer.add_scalar(f'rate/win_rate_player_{player.player_idx}', win_rate, self.epoch_num) tot_win_rate += win_rate * games tot_games_num += games win_rate = tot_win_rate / tot_games_num if tot_games_num > self.games_to_check: self.check_update_opponent(win_rate) self.writer.add_scalar('rate/win_rate', win_rate, self.epoch_num) def get_action_values(self, obs, is_op=False): processed_obs = self._preproc_obs(obs['obs_op'] if is_op else obs['obs']) if not is_op: self.model.eval() input_dict = { 'is_train': False, 'prev_actions': None, 'obs': processed_obs, 'rnn_states': self.rnn_states } with torch.no_grad(): if is_op: res_dict = { "actions": torch.zeros((self.num_actors * self.num_opponent_agents, self.actions_num), device=self.device), "values": torch.zeros((self.num_actors * self.num_opponent_agents, 1), device=self.device) } self.player_pool.inference(input_dict, res_dict, processed_obs) else: res_dict = self.model(input_dict) if self.has_central_value: states = obs['states'] input_dict = { 'is_train': False, 'states': states, } value = self.get_central_value(input_dict) res_dict['values'] = value return res_dict def resample_op(self, resample_indices): for op_idx in range(self.num_opponent_agents): for player in self.player_pool.players: player.remove_envs(resample_indices + op_idx * self.num_actors) for op_idx in range(self.num_opponent_agents): for env_idx in resample_indices: player = self.player_pool.sample_player() player.add_envs(env_idx + op_idx * self.num_actors) for player in self.player_pool.players: player.reset_envs() def resample_batch(self): env_indices = torch.arange(end=self.num_actors * self.num_opponent_agents, device=self.device, dtype=torch.long, requires_grad=False) step = self.num_actors // 32 for player in self.player_pool.players: player.clear_envs() for i in range(0, self.num_actors, step): player = self.player_pool.sample_player() player.add_envs(env_indices[i:i + step]) print("resample done") def restore_op(self, fn): checkpoint = load_checkpoint(fn, device=self.device) self.init_op_model.load_state_dict(checkpoint['model']) if self.normalize_input and 'running_mean_std' in checkpoint: self.init_op_model.running_mean_std.load_state_dict(checkpoint['running_mean_std']) def check_update_opponent(self, win_rate): if win_rate > self.update_win_rate or self.now_update_steps > self.max_update_steps: print(f'winrate:{win_rate},add opponent to player pool') self.update_op_num += 1 self.now_update_steps = 0 self.update_player_pool(self.model, player_idx=self.update_op_num) self.player_pool.clear_player_metric() self.resample_op(torch.arange(end=self.num_actors, device=self.device, dtype=torch.long)) self.save(os.path.join(self.players_dir, f'policy_{self.update_op_num}')) def create_model(self): model = self.network.build(self.base_model_config) model.to(self.device) return model def update_player_pool(self, model, player_idx): new_model = self.create_model() new_model.load_state_dict(copy.deepcopy(model.state_dict())) if hasattr(model, 'running_mean_std'): new_model.running_mean_std.load_state_dict(copy.deepcopy(model.running_mean_std.state_dict())) player = SinglePlayer(player_idx, new_model, self.device, self.num_actors * self.num_opponent_agents) self.player_pool.add_player(player) ================================================ FILE: timechamber/learning/ppo_sp_player.py ================================================ # License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE] import os import time import torch import numpy as np from rl_games.algos_torch import players import random from rl_games.algos_torch import torch_ext from rl_games.common.tr_helpers import unsqueeze_obs from rl_games.common.player import BasePlayer from .pfsp_player_pool import PFSPPlayerPool, PFSPPlayerVectorizedPool, PFSPPlayerThreadPool, PFSPPlayerProcessPool, \ SinglePlayer import matplotlib.pyplot as plt from multielo import MultiElo def rescale_actions(low, high, action): d = (high - low) / 2.0 m = (high + low) / 2.0 scaled_action = action * d + m return scaled_action class SPPlayer(BasePlayer): def __init__(self, params): params['config']['device_name'] = params['device'] super().__init__(params) print(f'params:{params}') self.network = self.config['network'] self.actions_num = self.action_space.shape[0] self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device) self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device) self.mask = [False] self.is_rnn = False self.normalize_input = self.config['normalize_input'] self.normalize_value = self.config.get('normalize_value', False) self.base_model_config = { 'actions_num': self.actions_num, 'input_shape': self.obs_shape, 'num_seqs': self.num_agents, 'value_size': self.env_info.get('value_size', 1), 'normalize_value': self.normalize_value, 'normalize_input': self.normalize_input, } self.policy_timestep = [] self.policy_op_timestep = [] self.params = params self.record_elo = self.player_config.get('record_elo', False) self.init_elo = self.player_config.get('init_elo', 400) self.num_actors = params['config']['num_actors'] self.player_pool_type = params['player_pool_type'] self.player_pool = None self.op_player_pool = None self.num_opponents = params['num_agents'] - 1 self.max_steps = 1000 self.update_op_num = 0 self.players_per_env = [] self.elo = MultiElo() def restore(self, load_dir): if os.path.isdir(load_dir): self.player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir))) print('dir:', load_dir) sorted_players = [] for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)): model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint)) self.policy_timestep.append(model_timestep) model = self.load_model(load_dir + '/' + str(policy_check_checkpoint)) new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device, rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents) sorted_players.append(new_player) sorted_players.sort(key=lambda player: player.player_idx) for idx, player in enumerate(sorted_players): player.player_idx = idx self.player_pool.add_player(player) self.policy_timestep.sort() else: self.player_pool = self._build_player_pool(params=self.params, player_num=1) self.policy_timestep.append(os.path.getmtime(load_dir)) model = self.load_model(load_dir) new_player = SinglePlayer(player_idx=0, model=model, device=self.device, rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents) self.player_pool.add_player(new_player) self.restore_op(self.params['op_load_path']) self._norm_policy_timestep() self._alloc_env_indices() def restore_op(self, load_dir): if os.path.isdir(load_dir): self.op_player_pool = self._build_player_pool(params=self.params, player_num=len(os.listdir(load_dir))) sorted_players = [] for idx, policy_check_checkpoint in enumerate(os.listdir(load_dir)): model_timestep = os.path.getmtime(load_dir + '/' + str(policy_check_checkpoint)) self.policy_op_timestep.append(model_timestep) model = self.load_model(load_dir + '/' + str(policy_check_checkpoint)) new_player = SinglePlayer(player_idx=model_timestep, model=model, device=self.device, rating=self.init_elo, obs_batch_len=self.num_actors * self.num_opponents) sorted_players.append(new_player) sorted_players.sort(key=lambda player: player.player_idx) for idx, player in enumerate(sorted_players): player.player_idx = idx self.op_player_pool.add_player(player) self.policy_op_timestep.sort() else: self.op_player_pool = self._build_player_pool(params=self.params, player_num=1) self.policy_op_timestep.append(os.path.getmtime(load_dir)) model = self.load_model(load_dir) new_player = SinglePlayer(player_idx=0, model=model, device=self.device, rating=400, obs_batch_len=self.num_actors * self.num_opponents) self.op_player_pool.add_player(new_player) def _alloc_env_indices(self): for idx in range(self.num_actors): player_idx = random.randint(0, len(self.player_pool.players) - 1) self.player_pool.players[player_idx].add_envs(torch.tensor([idx], dtype=torch.long, device=self.device)) env_player = [self.player_pool.players[player_idx]] for op_idx in range(self.num_opponents): op_player_idx = random.randint(0, len(self.op_player_pool.players) - 1) self.op_player_pool.players[op_player_idx].add_envs( torch.tensor([idx + op_idx * self.num_actors], dtype=torch.long, device=self.device)) env_player.append(self.op_player_pool.players[op_player_idx]) self.players_per_env.append(env_player) for player in self.player_pool.players: player.reset_envs() for player in self.op_player_pool.players: player.reset_envs() def _build_player_pool(self, params, player_num): if self.player_pool_type == 'multi_thread': return PFSPPlayerProcessPool(max_length=player_num, device=self.device) elif self.player_pool_type == 'multi_process': return PFSPPlayerThreadPool(max_length=player_num, device=self.device) elif self.player_pool_type == 'vectorized': vector_model_config = self.base_model_config vector_model_config['num_envs'] = self.num_actors * self.num_opponents vector_model_config['population_size'] = player_num return PFSPPlayerVectorizedPool(max_length=player_num, device=self.device, vector_model_config=vector_model_config, params=params) else: return PFSPPlayerPool(max_length=player_num, device=self.device) def _update_rating(self, info, env_indices): for env_idx in env_indices: if self.num_opponents == 1: player = self.players_per_env[env_idx][0] op_player = self.players_per_env[env_idx][1] if info['win'][env_idx]: player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating]) elif info['lose'][env_idx]: op_player.rating, player.rating = self.elo.get_new_ratings([op_player.rating, player.rating]) elif info['draw'][env_idx]: player.rating, op_player.rating = self.elo.get_new_ratings([player.rating, op_player.rating], result_order=[1, 1]) else: ranks = info['ranks'][env_idx].cpu().numpy() players_sorted_by_rank = sorted(enumerate(self.players_per_env[env_idx]), key=lambda x: ranks[x[0]]) sorted_ranks = sorted(ranks) now_ratings = [player.rating for idx, player in players_sorted_by_rank] new_ratings = self.elo.get_new_ratings(now_ratings, result_order=sorted_ranks) # print(now_ratings, new_ratings) # assert new_ratings[0] > 0 and new_ratings[1] > 0 and new_ratings[2] > 0 for idx, new_rating in enumerate(new_ratings): players_sorted_by_rank[idx][1].rating = new_rating def run(self): n_games = self.games_num render = self.render_env n_game_life = self.n_game_life is_determenistic = self.is_determenistic sum_rewards = 0 sum_steps = 0 sum_game_res = 0 n_games = n_games * n_game_life games_played = 0 has_masks = False has_masks_func = getattr(self.env, "has_action_mask", None) is not None if has_masks_func: has_masks = self.env.has_action_mask() print(f'games_num:{n_games}') need_init_rnn = self.is_rnn for _ in range(n_games): if games_played >= n_games: break obses = self.env_reset(self.env) batch_size = 1 batch_size = self.get_batch_size(obses['obs'], batch_size) if need_init_rnn: self.init_rnn() need_init_rnn = False cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device) steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device) print_game_res = False done_indices = torch.tensor([], device=self.device, dtype=torch.long) for n in range(self.max_steps): obses = self.env_reset(self.env, done_indices) if has_masks: masks = self.env.get_action_mask() action = self.get_masked_action( obses, masks, is_determenistic) else: action = self.get_action(obses['obs'], is_determenistic) action_op = self.get_action(obses['obs_op'], is_determenistic, is_op=True) obses, r, done, info = self.env_step(self.env, torch.cat((action, action_op), dim=0)) cr += r steps += 1 if render: self.env.render(mode='human') time.sleep(self.render_sleep) all_done_indices = done.nonzero(as_tuple=False) done_indices = all_done_indices[::self.num_agents] done_count = len(done_indices) games_played += done_count if self.record_elo: self._update_rating(info, all_done_indices.flatten()) if done_count > 0: if self.is_rnn: for s in self.states: s[:, all_done_indices, :] = s[:, all_done_indices, :] * 0.0 cur_rewards = cr[done_indices].sum().item() cur_steps = steps[done_indices].sum().item() cr = cr * (1.0 - done.float()) steps = steps * (1.0 - done.float()) sum_rewards += cur_rewards sum_steps += cur_steps game_res = 0.0 if isinstance(info, dict): if 'battle_won' in info: print_game_res = True game_res = info.get('battle_won', 0.5) if 'scores' in info: print_game_res = True game_res = info.get('scores', 0.5) if self.print_stats: if print_game_res: print('reward:', cur_rewards / done_count, 'steps:', cur_steps / done_count, 'w:', game_res) else: print('reward:', cur_rewards / done_count, 'steps:', cur_steps / done_count) sum_game_res += game_res if batch_size // self.num_agents == 1 or games_played >= n_games: print(f"games_player: {games_played}") break done_indices = done_indices[:, 0] if self.record_elo: self._plot_elo_curve() def _plot_elo_curve(self): x = np.array(self.policy_timestep) y = np.arange(len(self.player_pool.players)) x_op = np.array(self.policy_op_timestep) y_op = np.arange(len(self.op_player_pool.players)) for player in self.player_pool.players: idx = player.player_idx # print(player.player_idx, player.rating) y[idx] = player.rating for player in self.op_player_pool.players: idx = player.player_idx # print(player.player_idx, player.rating) y_op[idx] = player.rating if self.params['load_path'] != self.params['op_load_path']: l1 = plt.plot(x, y, 'b--', label='policy') l2 = plt.plot(x_op, y_op, 'r--', label='policy_op') plt.plot(x, y, 'b^-', x_op, y_op, 'ro-') else: l1 = plt.plot(x, y, 'b--', label='policy') plt.plot(x, y, 'b^-') plt.title('ELO Curve') plt.xlabel('timestep/days') plt.ylabel('ElO') plt.legend() parent_path = os.path.dirname(self.params['load_path']) plt.savefig(os.path.join(parent_path, 'elo.jpg')) def get_action(self, obs, is_determenistic=False, is_op=False): if self.has_batch_dimension == False: obs = unsqueeze_obs(obs) obs = self._preproc_obs(obs) input_dict = { 'is_train': False, 'prev_actions': None, 'obs': obs, 'rnn_states': self.states } with torch.no_grad(): data_len = self.num_actors * self.num_opponents if is_op else self.num_actors res_dict = { "actions": torch.zeros((data_len, self.actions_num), device=self.device), "values": torch.zeros((data_len, 1), device=self.device), "mus": torch.zeros((data_len, self.actions_num), device=self.device) } if is_op: self.op_player_pool.inference(input_dict, res_dict, obs) else: self.player_pool.inference(input_dict, res_dict, obs) mu = res_dict['mus'] action = res_dict['actions'] # self.states = res_dict['rnn_states'] if is_determenistic: current_action = mu else: current_action = action if self.has_batch_dimension == False: current_action = torch.squeeze(current_action.detach()) if self.clip_actions: return rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0)) else: return current_action def _norm_policy_timestep(self): self.policy_op_timestep.sort() self.policy_timestep.sort() for idx in range(1, len(self.policy_op_timestep)): self.policy_op_timestep[idx] -= self.policy_op_timestep[0] self.policy_op_timestep[idx] /= 3600 * 24 for idx in range(1, len(self.policy_timestep)): self.policy_timestep[idx] -= self.policy_timestep[0] self.policy_timestep[idx] /= 3600 * 24 self.policy_timestep[0] = 0 if len(self.policy_op_timestep): self.policy_op_timestep[0] = 0 def env_reset(self, env, done_indices=None): obs = env.reset(done_indices) obs_dict = {} obs_dict['obs_op'] = obs[self.num_actors:] obs_dict['obs'] = obs[:self.num_actors] return obs_dict def env_step(self, env, actions): obs, rewards, dones, infos = env.step(actions) if hasattr(obs, 'dtype') and obs.dtype == np.float64: obs = np.float32(obs) obs_dict = {} obs_dict['obs_op'] = obs[self.num_actors:] obs_dict['obs'] = obs[:self.num_actors] if self.value_size > 1: rewards = rewards[0] if self.is_tensor_obses: return self.obs_to_torch(obs_dict), rewards.cpu(), dones.cpu(), infos else: if np.isscalar(dones): rewards = np.expand_dims(np.asarray(rewards), 0) dones = np.expand_dims(np.asarray(dones), 0) return obs_dict, rewards, dones, infos def create_model(self): model = self.network.build(self.base_model_config) model.to(self.device) return model def load_model(self, fn): model = self.create_model() checkpoint = torch_ext.safe_filesystem_op(torch.load, fn, map_location=self.device) model.load_state_dict(checkpoint['model']) if self.normalize_input and 'running_mean_std' in checkpoint: model.running_mean_std.load_state_dict(checkpoint['running_mean_std']) return model ================================================ FILE: timechamber/learning/replay_buffer.py ================================================ # License: see [LICENSE, LICENSES/isaacgymenvs/LICENSE] import torch class ReplayBuffer(): def __init__(self, buffer_size, device): self._head = 0 self._total_count = 0 self._buffer_size = buffer_size self._device = device self._data_buf = None self._sample_idx = torch.randperm(buffer_size) self._sample_head = 0 return def reset(self): self._head = 0 self._total_count = 0 self._reset_sample_idx() return def get_buffer_size(self): return self._buffer_size def get_total_count(self): return self._total_count def store(self, data_dict): if (self._data_buf is None): self._init_data_buf(data_dict) n = next(iter(data_dict.values())).shape[0] buffer_size = self.get_buffer_size() assert (n < buffer_size) for key, curr_buf in self._data_buf.items(): curr_n = data_dict[key].shape[0] assert (n == curr_n) store_n = min(curr_n, buffer_size - self._head) curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n] remainder = n - store_n if (remainder > 0): curr_buf[0:remainder] = data_dict[key][store_n:] self._head = (self._head + n) % buffer_size self._total_count += n return def sample(self, n): total_count = self.get_total_count() buffer_size = self.get_buffer_size() idx = torch.arange(self._sample_head, self._sample_head + n) idx = idx % buffer_size rand_idx = self._sample_idx[idx] if (total_count < buffer_size): rand_idx = rand_idx % self._head samples = dict() for k, v in self._data_buf.items(): samples[k] = v[rand_idx] self._sample_head += n if (self._sample_head >= buffer_size): self._reset_sample_idx() return samples def _reset_sample_idx(self): buffer_size = self.get_buffer_size() self._sample_idx[:] = torch.randperm(buffer_size) self._sample_head = 0 return def _init_data_buf(self, data_dict): buffer_size = self.get_buffer_size() self._data_buf = dict() for k, v in data_dict.items(): v_shape = v.shape[1:] self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device) return ================================================ FILE: timechamber/learning/vectorized_models.py ================================================ import torch import torch.nn as nn from rl_games.algos_torch.running_mean_std import RunningMeanStd, RunningMeanStdObs from rl_games.algos_torch import torch_ext from rl_games.algos_torch.models import ModelA2CContinuousLogStd class VectorizedRunningMeanStd(RunningMeanStd): def __init__(self, insize, population_size, epsilon=1e-05, per_channel=False, norm_only=False, is_training=False): # input shape: population_size*batch_size*(insize) super(VectorizedRunningMeanStd, self).__init__(population_size, epsilon, per_channel, norm_only) self.insize = insize self.epsilon = epsilon self.population_size = population_size self.training = is_training self.norm_only = norm_only self.per_channel = per_channel if per_channel: if len(self.insize) == 3: self.axis = [1, 3, 4] if len(self.insize) == 2: self.axis = [1, 3] if len(self.insize) == 1: self.axis = [1] in_size = self.insize[1] else: self.axis = [1] in_size = insize # print(in_size) self.register_buffer("running_mean", torch.zeros((population_size, *in_size), dtype=torch.float32)) self.register_buffer("running_var", torch.ones((population_size, *in_size), dtype=torch.float32)) self.register_buffer("count", torch.ones((population_size, 1), dtype=torch.float32)) def _update_mean_var_count_from_moments(self, mean, var, count, batch_mean, batch_var, batch_count): delta = batch_mean - mean tot_count = count + batch_count new_mean = mean + delta * batch_count / tot_count m_a = var * count m_b = batch_var * batch_count M2 = m_a + m_b + delta ** 2 * count * batch_count / tot_count new_var = M2 / tot_count new_count = tot_count return new_mean, new_var, new_count def forward(self, input, unnorm=False, mask=None): if self.training: if mask is not None: mean, var = torch_ext.get_mean_std_with_masks(input, mask) else: mean = input.mean(self.axis) # along channel axis var = input.var(self.axis) self.running_mean, self.running_var, self.count = self._update_mean_var_count_from_moments( self.running_mean, self.running_var, self.count, mean, var, input.size()[1]) # change shape if self.per_channel: if len(self.insize) == 3: current_mean = self.running_mean.view([self.population_size, 1, self.insize[0], 1, 1]).expand_as(input) current_var = self.running_var.view([self.population_size, 1, self.insize[0], 1, 1]).expand_as(input) if len(self.insize) == 2: current_mean = self.running_mean.view([self.population_size, 1, self.insize[0], 1]).expand_as(input) current_var = self.running_var.view([self.population_size, 1, self.insize[0], 1]).expand_as(input) if len(self.insize) == 1: current_mean = self.running_mean.view([self.population_size, 1, self.insize[0]]).expand_as(input) current_var = self.running_var.view([self.population_size, 1, self.insize[0]]).expand_as(input) else: current_mean = self.running_mean current_var = self.running_var # get output if unnorm: y = torch.clamp(input, min=-5.0, max=5.0) y = torch.sqrt(torch.unsqueeze(current_var.float(), 1) + self.epsilon) * y + torch.unsqueeze( current_mean.float(), 1) else: if self.norm_only: y = input / torch.sqrt(current_var.float() + self.epsilon) else: y = (input - torch.unsqueeze(current_mean.float(), 1)) / torch.sqrt( torch.unsqueeze(current_var.float(), 1) + self.epsilon) y = torch.clamp(y, min=-5.0, max=5.0) return y class ModelVectorizedA2C(ModelA2CContinuousLogStd): def __init__(self, network): super().__init__(network) return def build(self, config): net = self.network_builder.build('vectorized_a2c', **config) for name, _ in net.named_parameters(): print(name) obs_shape = config['input_shape'] population_size = config['population_size'] normalize_value = config.get('normalize_value', False) normalize_input = config.get('normalize_input', False) value_size = config.get('value_size', 1) return self.Network(net, population_size, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size, ) class Network(ModelA2CContinuousLogStd.Network): def __init__(self, a2c_network, population_size, obs_shape, normalize_value, normalize_input, value_size): self.population_size = population_size super().__init__(a2c_network, obs_shape=obs_shape, normalize_value=normalize_value, normalize_input=normalize_input, value_size=value_size) if normalize_value: self.value_mean_std = VectorizedRunningMeanStd((self.value_size,), self.population_size) if normalize_input: if isinstance(obs_shape, dict): self.running_mean_std = RunningMeanStdObs(obs_shape) else: self.running_mean_std = VectorizedRunningMeanStd(obs_shape, self.population_size) def update(self, population_idx, network): for key in self.state_dict(): param1 = self.state_dict()[key] param2 = network.state_dict()[key] if len(param1.shape) == len(param2.shape): self.state_dict()[key] = param2 elif len(param2.shape) == 1: if len(param1.shape) == 3: param1[population_idx] = torch.unsqueeze(param2, dim=0) else: param1[population_idx] = param2 elif len(param2.shape) == 2: param1[population_idx] = torch.transpose(param2, 0, 1) ================================================ FILE: timechamber/learning/vectorized_network_builder.py ================================================ import torch import torch.nn as nn import math from rl_games.algos_torch import network_builder class VectorizedLinearLayer(torch.nn.Module): """Vectorized version of torch.nn.Linear.""" def __init__( self, population_size: int, in_features: int, out_features: int, use_layer_norm: bool = False, ): super().__init__() self._population_size = population_size self._in_features = in_features self._out_features = out_features self.weight = torch.nn.Parameter( torch.empty(self._population_size, self._in_features, self._out_features), requires_grad=True, ) self.bias = torch.nn.Parameter( torch.empty(self._population_size, 1, self._out_features), requires_grad=True, ) for member_id in range(population_size): torch.nn.init.kaiming_uniform_(self.weight[member_id], a=math.sqrt(5)) fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight[0]) bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0 torch.nn.init.uniform_(self.bias, -bound, bound) self._layer_norm = ( torch.nn.LayerNorm(self._out_features, self._population_size) if use_layer_norm else None ) def forward(self, x: torch.Tensor) -> torch.Tensor: assert x.shape[0] == self._population_size if self._layer_norm is not None: return self._layer_norm(x.matmul(self.weight) + self.bias) return x.matmul(self.weight) + self.bias class VectorizedA2CBuilder(network_builder.A2CBuilder): def __init__(self, **kwargs): super().__init__(**kwargs) return class Network(network_builder.A2CBuilder.Network): def __init__(self, params, **kwargs): self.population_size = kwargs.get('population_size') super().__init__(params, **kwargs) self.value = VectorizedLinearLayer(population_size=self.population_size, in_features=self.units[-1], out_features=self.value_size) actions_num = kwargs.get('actions_num') self.mu = VectorizedLinearLayer(self.population_size, self.units[-1], actions_num) if self.fixed_sigma: self.sigma = nn.Parameter( torch.zeros((self.population_size, 1, actions_num), requires_grad=True, dtype=torch.float32), requires_grad=True) else: self.sigma = VectorizedLinearLayer(self.population_size, self.units[-1], actions_num) def _build_vectorized_mlp(self, input_size, units, activation, norm_func_name=None): print(f'build vectorized mlp:{self.population_size}x{input_size}') in_size = input_size layers = [] for unit in units: layers.append( VectorizedLinearLayer(self.population_size, in_size, unit, norm_func_name == 'layer_norm')) layers.append(self.activations_factory.create(activation)) in_size = unit return nn.Sequential(*layers) def _build_mlp(self, input_size, units, activation, dense_func, norm_only_first_layer=False, norm_func_name=None, d2rl=False): return self._build_vectorized_mlp(input_size, units, activation, norm_func_name=norm_func_name) def forward(self, obs_dict): # implement continues situation obs = obs_dict['obs'] states = obs_dict.get('rnn_states', None) out = self.actor_mlp(obs) value = self.value_act(self.value(out)) mu = self.mu_act(self.mu(out)) if self.fixed_sigma: sigma = self.sigma_act(self.sigma) else: sigma = self.sigma_act(self.sigma(out)) return mu, mu * 0 + sigma, value, states def load(self, params): super().load(params) def build(self, name, **kwargs): net = VectorizedA2CBuilder.Network(self.params, **kwargs) return net ================================================ FILE: timechamber/models/Humanoid_Strike/policy.pth ================================================ [File too large to display: 19.5 MB] ================================================ FILE: timechamber/models/Humanoid_Strike/policy_op.pth ================================================ [File too large to display: 19.5 MB] ================================================ FILE: timechamber/tasks/__init__.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from .ma_ant_sumo import MA_Ant_Sumo from .ma_ant_battle import MA_Ant_Battle from .ma_humanoid_strike import HumanoidStrike # Mappings from strings to environments isaacgym_task_map = { "MA_Ant_Sumo": MA_Ant_Sumo, "MA_Ant_Battle": MA_Ant_Battle, "MA_Humanoid_Strike": HumanoidStrike } ================================================ FILE: timechamber/tasks/ase_humanoid_base/base_task.py ================================================ # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. import sys import os import operator from copy import deepcopy import random from isaacgym import gymapi from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples import numpy as np import torch # Base class for RL tasks class BaseTask(): def __init__(self, cfg, enable_camera_sensors=False): self.gym = gymapi.acquire_gym() self.device_type = cfg.get("device_type", "cuda") self.device_id = cfg.get("device_id", 0) self.device = "cpu" if self.device_type == "cuda" or self.device_type == "GPU": self.device = "cuda" + ":" + str(self.device_id) self.headless = cfg["headless"] self.num_agents = cfg["env"].get("numAgents", 1) # used for multi-agent environments # double check! self.graphics_device_id = self.device_id if enable_camera_sensors == False and self.headless == True: self.graphics_device_id = -1 self.num_envs = cfg["env"]["numEnvs"] self.num_obs = cfg["env"]["numObservations"] self.num_states = cfg["env"].get("numStates", 0) self.num_actions = cfg["env"]["numActions"] self.control_freq_inv = cfg["env"].get("controlFrequencyInv", 1) # optimization flags for pytorch JIT torch._C._jit_set_profiling_mode(False) torch._C._jit_set_profiling_executor(False) # allocate buffers self.obs_buf = torch.zeros( (self.num_envs, self.num_obs), device=self.device, dtype=torch.float) self.states_buf = torch.zeros( (self.num_envs, self.num_states), device=self.device, dtype=torch.float) self.rew_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.float) self.reset_buf = torch.ones( self.num_envs, device=self.device, dtype=torch.long) self.progress_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.randomize_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.extras = {} self.original_props = {} self.dr_randomizations = {} self.first_randomization = True self.actor_params_generator = None self.extern_actor_params = {} for env_id in range(self.num_envs): self.extern_actor_params[env_id] = None self.last_step = -1 self.last_rand_step = -1 # create envs, sim and viewer self.create_sim() self.gym.prepare_sim(self.sim) # todo: read from config self.enable_viewer_sync = True self.viewer = None # if running with a viewer, set up keyboard shortcuts and camera if self.headless == False: # subscribe to keyboard shortcuts self.viewer = self.gym.create_viewer( self.sim, gymapi.CameraProperties()) self.gym.subscribe_viewer_keyboard_event( self.viewer, gymapi.KEY_ESCAPE, "QUIT") self.gym.subscribe_viewer_keyboard_event( self.viewer, gymapi.KEY_V, "toggle_viewer_sync") # set the camera position based on up axis sim_params = self.gym.get_sim_params(self.sim) if sim_params.up_axis == gymapi.UP_AXIS_Z: cam_pos = gymapi.Vec3(20.0, 25.0, 3.0) cam_target = gymapi.Vec3(10.0, 15.0, 0.0) else: cam_pos = gymapi.Vec3(20.0, 3.0, 25.0) cam_target = gymapi.Vec3(10.0, 0.0, 15.0) self.gym.viewer_camera_look_at( self.viewer, None, cam_pos, cam_target) # set gravity based on up axis and return axis index def set_sim_params_up_axis(self, sim_params, axis): if axis == 'z': sim_params.up_axis = gymapi.UP_AXIS_Z sim_params.gravity.x = 0 sim_params.gravity.y = 0 sim_params.gravity.z = -9.81 return 2 return 1 def create_sim(self, compute_device, graphics_device, physics_engine, sim_params): sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params) if sim is None: print("*** Failed to create sim") quit() return sim def step(self, actions): if self.dr_randomizations.get('actions', None): actions = self.dr_randomizations['actions']['noise_lambda'](actions) # apply actions self.pre_physics_step(actions) # step physics and render each frame self._physics_step() # to fix! if self.device == 'cpu': self.gym.fetch_results(self.sim, True) # compute observations, rewards, resets, ... self.post_physics_step() if self.dr_randomizations.get('observations', None): self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf) def get_states(self): return self.states_buf def render(self, sync_frame_time=False): if self.viewer: # check for window closed if self.gym.query_viewer_has_closed(self.viewer): sys.exit() # check for keyboard events for evt in self.gym.query_viewer_action_events(self.viewer): if evt.action == "QUIT" and evt.value > 0: sys.exit() elif evt.action == "toggle_viewer_sync" and evt.value > 0: self.enable_viewer_sync = not self.enable_viewer_sync # fetch results if self.device != 'cpu': self.gym.fetch_results(self.sim, True) # step graphics if self.enable_viewer_sync: self.gym.step_graphics(self.sim) self.gym.draw_viewer(self.viewer, self.sim, True) else: self.gym.poll_viewer_events(self.viewer) def get_actor_params_info(self, dr_params, env): """Returns a flat array of actor params, their names and ranges.""" if "actor_params" not in dr_params: return None params = [] names = [] lows = [] highs = [] param_getters_map = get_property_getter_map(self.gym) for actor, actor_properties in dr_params["actor_params"].items(): handle = self.gym.find_actor_handle(env, actor) for prop_name, prop_attrs in actor_properties.items(): if prop_name == 'color': continue # this is set randomly props = param_getters_map[prop_name](env, handle) if not isinstance(props, list): props = [props] for prop_idx, prop in enumerate(props): for attr, attr_randomization_params in prop_attrs.items(): name = prop_name+'_'+str(prop_idx)+'_'+attr lo_hi = attr_randomization_params['range'] distr = attr_randomization_params['distribution'] if 'uniform' not in distr: lo_hi = (-1.0*float('Inf'), float('Inf')) if isinstance(prop, np.ndarray): for attr_idx in range(prop[attr].shape[0]): params.append(prop[attr][attr_idx]) names.append(name+'_'+str(attr_idx)) lows.append(lo_hi[0]) highs.append(lo_hi[1]) else: params.append(getattr(prop, attr)) names.append(name) lows.append(lo_hi[0]) highs.append(lo_hi[1]) return params, names, lows, highs # Apply randomizations only on resets, due to current PhysX limitations def apply_randomizations(self, dr_params): # If we don't have a randomization frequency, randomize every step rand_freq = dr_params.get("frequency", 1) # First, determine what to randomize: # - non-environment parameters when > frequency steps have passed since the last non-environment # - physical environments in the reset buffer, which have exceeded the randomization frequency threshold # - on the first call, randomize everything self.last_step = self.gym.get_frame_count(self.sim) if self.first_randomization: do_nonenv_randomize = True env_ids = list(range(self.num_envs)) else: do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf)) rand_envs = torch.logical_and(rand_envs, self.reset_buf) env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist() self.randomize_buf[rand_envs] = 0 if do_nonenv_randomize: self.last_rand_step = self.last_step param_setters_map = get_property_setter_map(self.gym) param_setter_defaults_map = get_default_setter_args(self.gym) param_getters_map = get_property_getter_map(self.gym) # On first iteration, check the number of buckets if self.first_randomization: check_buckets(self.gym, self.envs, dr_params) for nonphysical_param in ["observations", "actions"]: if nonphysical_param in dr_params and do_nonenv_randomize: dist = dr_params[nonphysical_param]["distribution"] op_type = dr_params[nonphysical_param]["operation"] sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None op = operator.add if op_type == 'additive' else operator.mul if sched_type == 'linear': sched_scaling = 1.0 / sched_step * \ min(self.last_step, sched_step) elif sched_type == 'constant': sched_scaling = 0 if self.last_step < sched_step else 1 else: sched_scaling = 1 if dist == 'gaussian': mu, var = dr_params[nonphysical_param]["range"] mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) if op_type == 'additive': mu *= sched_scaling var *= sched_scaling mu_corr *= sched_scaling var_corr *= sched_scaling elif op_type == 'scaling': var = var * sched_scaling # scale up var over time mu = mu * sched_scaling + 1.0 * \ (1.0 - sched_scaling) # linearly interpolate var_corr = var_corr * sched_scaling # scale up var over time mu_corr = mu_corr * sched_scaling + 1.0 * \ (1.0 - sched_scaling) # linearly interpolate def noise_lambda(tensor, param_name=nonphysical_param): params = self.dr_randomizations[param_name] corr = params.get('corr', None) if corr is None: corr = torch.randn_like(tensor) params['corr'] = corr corr = corr * params['var_corr'] + params['mu_corr'] return op( tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu']) self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda} elif dist == 'uniform': lo, hi = dr_params[nonphysical_param]["range"] lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) if op_type == 'additive': lo *= sched_scaling hi *= sched_scaling lo_corr *= sched_scaling hi_corr *= sched_scaling elif op_type == 'scaling': lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling) hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling) lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) def noise_lambda(tensor, param_name=nonphysical_param): params = self.dr_randomizations[param_name] corr = params.get('corr', None) if corr is None: corr = torch.randn_like(tensor) params['corr'] = corr corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr'] return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo']) self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda} if "sim_params" in dr_params and do_nonenv_randomize: prop_attrs = dr_params["sim_params"] prop = self.gym.get_sim_params(self.sim) if self.first_randomization: self.original_props["sim_params"] = { attr: getattr(prop, attr) for attr in dir(prop)} for attr, attr_randomization_params in prop_attrs.items(): apply_random_samples( prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step) self.gym.set_sim_params(self.sim, prop) # If self.actor_params_generator is initialized: use it to # sample actor simulation params. This gives users the # freedom to generate samples from arbitrary distributions, # e.g. use full-covariance distributions instead of the DR's # default of treating each simulation parameter independently. extern_offsets = {} if self.actor_params_generator is not None: for env_id in env_ids: self.extern_actor_params[env_id] = \ self.actor_params_generator.sample() extern_offsets[env_id] = 0 for actor, actor_properties in dr_params["actor_params"].items(): for env_id in env_ids: env = self.envs[env_id] handle = self.gym.find_actor_handle(env, actor) extern_sample = self.extern_actor_params[env_id] for prop_name, prop_attrs in actor_properties.items(): if prop_name == 'color': num_bodies = self.gym.get_actor_rigid_body_count( env, handle) for n in range(num_bodies): self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL, gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))) continue if prop_name == 'scale': attr_randomization_params = prop_attrs sample = generate_random_samples(attr_randomization_params, 1, self.last_step, None) og_scale = 1 if attr_randomization_params['operation'] == 'scaling': new_scale = og_scale * sample elif attr_randomization_params['operation'] == 'additive': new_scale = og_scale + sample self.gym.set_actor_scale(env, handle, new_scale) continue prop = param_getters_map[prop_name](env, handle) if isinstance(prop, list): if self.first_randomization: self.original_props[prop_name] = [ {attr: getattr(p, attr) for attr in dir(p)} for p in prop] for p, og_p in zip(prop, self.original_props[prop_name]): for attr, attr_randomization_params in prop_attrs.items(): smpl = None if self.actor_params_generator is not None: smpl, extern_offsets[env_id] = get_attr_val_from_sample( extern_sample, extern_offsets[env_id], p, attr) apply_random_samples( p, og_p, attr, attr_randomization_params, self.last_step, smpl) else: if self.first_randomization: self.original_props[prop_name] = deepcopy(prop) for attr, attr_randomization_params in prop_attrs.items(): smpl = None if self.actor_params_generator is not None: smpl, extern_offsets[env_id] = get_attr_val_from_sample( extern_sample, extern_offsets[env_id], prop, attr) apply_random_samples( prop, self.original_props[prop_name], attr, attr_randomization_params, self.last_step, smpl) setter = param_setters_map[prop_name] default_args = param_setter_defaults_map[prop_name] setter(env, handle, prop, *default_args) if self.actor_params_generator is not None: for env_id in env_ids: # check that we used all dims in sample if extern_offsets[env_id] > 0: extern_sample = self.extern_actor_params[env_id] if extern_offsets[env_id] != extern_sample.shape[0]: print('env_id', env_id, 'extern_offset', extern_offsets[env_id], 'vs extern_sample.shape', extern_sample.shape) raise Exception("Invalid extern_sample size") self.first_randomization = False def pre_physics_step(self, actions): raise NotImplementedError def _physics_step(self): for i in range(self.control_freq_inv): self.render() self.gym.simulate(self.sim) return def post_physics_step(self): raise NotImplementedError def get_attr_val_from_sample(sample, offset, prop, attr): """Retrieves param value for the given prop and attr from the sample.""" if sample is None: return None, 0 if isinstance(prop, np.ndarray): smpl = sample[offset:offset+prop[attr].shape[0]] return smpl, offset+prop[attr].shape[0] else: return sample[offset], offset+1 ================================================ FILE: timechamber/tasks/ase_humanoid_base/humanoid.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np import os import torch from isaacgym import gymtorch from isaacgym import gymapi from isaacgym.torch_utils import * from timechamber.utils import torch_utils from timechamber.utils.utils import print_actor_info, print_asset_info from timechamber.tasks.ase_humanoid_base.base_task import BaseTask class Humanoid(BaseTask): def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): self.cfg = cfg self.sim_params = sim_params self.physics_engine = physics_engine ## self.borderline_space = self.cfg["env"]["borderlineSpace"] self.num_agents = self.cfg["env"].get("numAgents", 1) self._pd_control = self.cfg["env"]["pdControl"] self.power_scale = self.cfg["env"]["powerScale"] self.debug_viz = self.cfg["env"]["enableDebugVis"] self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"] self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"] self.plane_restitution = self.cfg["env"]["plane"]["restitution"] self.max_episode_length = self.cfg["env"]["episodeLength"] self._local_root_obs = self.cfg["env"]["localRootObs"] self._root_height_obs = self.cfg["env"].get("rootHeightObs", True) self._enable_early_termination = self.cfg["env"]["enableEarlyTermination"] key_bodies = self.cfg["env"]["keyBodies"] self._setup_character_props(key_bodies) self.cfg["env"]["numObservations"] = self.get_obs_size() self.cfg["env"]["numActions"] = self.get_action_size() self.cfg["device_type"] = device_type self.cfg["device_id"] = device_id self.cfg["headless"] = headless super().__init__(cfg=self.cfg) self.dt = self.control_freq_inv * sim_params.dt # get gym GPU state tensors actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) # print(f"dof_state_tensor shape: {dof_state_tensor.shape}") sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim) rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim) contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim) sensors_per_env = 2 self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs * self.num_agents, sensors_per_env * 6) dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim) self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs * self.num_agents, self.num_dof) self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_rigid_body_state_tensor(self.sim) self.gym.refresh_net_contact_force_tensor(self.sim) self._root_states = gymtorch.wrap_tensor(actor_root_state) # print(f'root_states:{self._root_states.shape}') num_actors = self.get_num_actors_per_env() # print(f"num actors: {num_actors}") self._humanoid_root_states = self._root_states # print(f"humanoid_root_states shape: {self._humanoid_root_states.shape}") # (num_envs*2, 13) self._initial_humanoid_root_states = self._humanoid_root_states.clone() self._initial_humanoid_root_states[:, 7:13] = 0 # zero for linear vel and angular vel self._humanoid_actor_ids = num_actors * torch.arange(self.num_envs, device=self.device, dtype=torch.int32) # print(f"humanoid_actor_ids: {self._humanoid_actor_ids}") # 0, 2, 4, 6... # print(f"humanoid indices: {self.humanoid_indices}") # 0, 2, 4, 6... # print(f"humanooid op indices: {self.humanoid_indices_op}") # 1, 3, 5, 7... # create some wrapper tensors for different slices self._dof_state = gymtorch.wrap_tensor(dof_state_tensor) dofs_per_env = self._dof_state.shape[0] // self.num_envs self._dof_pos = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 0] self._dof_vel = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 1] # op self._dof_pos_op = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., self.num_dof:, 0] self._dof_vel_op = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., self.num_dof:, 1] self._initial_dof_pos = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float) self._initial_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float) # op self._initial_dof_pos_op = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float) self._initial_dof_vel_op = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float) self._rigid_body_state = gymtorch.wrap_tensor(rigid_body_state) bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs rigid_body_state_reshaped = self._rigid_body_state.view(self.num_envs, bodies_per_env, 13) self._rigid_body_pos = rigid_body_state_reshaped[..., :self.num_bodies, 0:3] self._rigid_body_rot = rigid_body_state_reshaped[..., :self.num_bodies, 3:7] self._rigid_body_vel = rigid_body_state_reshaped[..., :self.num_bodies, 7:10] self._rigid_body_ang_vel = rigid_body_state_reshaped[..., :self.num_bodies, 10:13] # op self._rigid_body_pos_op = rigid_body_state_reshaped[..., self.num_bodies:, 0:3] self._rigid_body_rot_op = rigid_body_state_reshaped[..., self.num_bodies:, 3:7] self._rigid_body_vel_op = rigid_body_state_reshaped[..., self.num_bodies:, 7:10] self._rigid_body_ang_vel_op = rigid_body_state_reshaped[..., self.num_bodies:, 10:13] contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor) self._contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., :self.num_bodies, :] self._contact_forces_op = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., self.num_bodies:, :] self._terminate_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) self._build_termination_heights() contact_bodies = self.cfg["env"]["contactBodies"] self._key_body_ids = self._build_key_body_ids_tensor(key_bodies) self._contact_body_ids = self._build_contact_body_ids_tensor(contact_bodies) self.allocate_buffers() return def get_obs_size(self): return self._num_obs def get_action_size(self): return self._num_actions def get_num_actors_per_env(self): num_actors = self._root_states.shape[0] // self.num_envs return num_actors def _add_circle_borderline(self, env): lines = [] borderline_height = 0.01 for height in range(20): for angle in range(360): begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height] end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height] lines.append(begin_point) lines.append(end_point) lines = np.array(lines, dtype=np.float32) * self.borderline_space colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32) self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors) def _add_rectangle_borderline(self, env): lines = [] colors = np.zeros((90*60, 3), dtype=np.float32) for k in range(4): for height in range(10): lines1 = [] lines2 = [] lines3 = [] lines4 = [] for i in range(90): begin_point1 = [-self.borderline_space + i * self.borderline_space / 45, self.borderline_space, height*0.01+ k*0.25] end_point1 = [-self.borderline_space + (i+1) * self.borderline_space / 45, self.borderline_space, height*0.01+ k*0.25] begin_point2 = [self.borderline_space, self.borderline_space - i * self.borderline_space / 45, height*0.01+ k*0.25] end_point2 = [self.borderline_space, self.borderline_space - (i+1) * self.borderline_space / 45, height*0.01+ k*0.25] begin_point3 = [self.borderline_space - i * self.borderline_space / 45, -self.borderline_space, height*0.01+ k*0.25] end_point3 = [self.borderline_space - (i+1) * self.borderline_space / 45, -self.borderline_space, height*0.01+ k*0.25] begin_point4 = [-self.borderline_space , -self.borderline_space + i * self.borderline_space / 45, height*0.01+ k*0.25] end_point4 = [-self.borderline_space, -self.borderline_space + (i+1) * self.borderline_space / 45, height*0.01+ k*0.25] lines1.append(begin_point1) lines1.append(end_point1) lines2.append(begin_point2) lines2.append(end_point2) lines3.append(begin_point3) lines3.append(end_point3) lines4.append(begin_point4) lines4.append(end_point4) lines.extend(lines1) lines.extend(lines2) lines.extend(lines3) lines.extend(lines4) lines = np.array(lines, dtype=np.float32) colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32) self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors) def allocate_buffers(self): self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device, dtype=torch.float) self.states_buf = torch.zeros( (self.num_envs, self.num_states), device=self.device, dtype=torch.float) self.rew_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.float) self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) self.timeout_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.progress_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.randomize_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.extras = { 'win': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool), 'lose': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool), 'draw': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool)} self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1)) self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1)) self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1)) def create_sim(self): self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z') self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) self._create_ground_plane() self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs))) return def reset(self, env_ids=None): if (env_ids is None): env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long) self._reset_envs(env_ids) return def set_char_color(self, col, env_ids): for env_id in env_ids: env_ptr = self.envs[env_id] handle = self.humanoid_handles[env_id] for j in range(self.num_bodies): self.gym.set_rigid_body_color(env_ptr, handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(col[0], col[1], col[2])) return def _reset_envs(self, env_ids): if (len(env_ids) > 0): self._reset_actors(env_ids) self._reset_env_tensors(env_ids) self._refresh_sim_tensors() self._compute_observations() return def _reset_env_tensors(self, env_ids): # env_ids_int32 = self._humanoid_actor_ids[env_ids] env_ids_int32 = (torch.cat((self.humanoid_indices[env_ids], self.humanoid_indices_op[env_ids]))).to(dtype=torch.int32) self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._dof_state), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) self.progress_buf[env_ids] = 0 self.reset_buf[env_ids] = 0 self._terminate_buf[env_ids] = 0 return def _create_ground_plane(self): plane_params = gymapi.PlaneParams() plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) plane_params.static_friction = self.plane_static_friction plane_params.dynamic_friction = self.plane_dynamic_friction plane_params.restitution = self.plane_restitution self.gym.add_ground(self.sim, plane_params) return def _setup_character_props(self, key_bodies): asset_file = self.cfg["env"]["asset"]["assetFileName"] num_key_bodies = len(key_bodies) if (asset_file == "mjcf/amp_humanoid.xml"): self._dof_body_ids = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14] self._dof_offsets = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28] self._dof_obs_size = 72 self._num_actions = 28 self._num_obs = 1 + 15 * (3 + 6 + 3 + 3) - 3 elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"): self._dof_body_ids = [1, 2, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16] self._dof_offsets = [0, 3, 6, 9, 10, 13, 16, 17, 20, 21, 24, 27, 28, 31] self._dof_obs_size = 78 self._num_actions = 31 self._num_obs = 1 + 17 * (3 + 6 + 3 + 3) - 3 else: print("Unsupported character config file: {s}".format(asset_file)) assert(False) return def _build_termination_heights(self): head_term_height = 0.3 shield_term_height = 0.32 termination_height = self.cfg["env"]["terminationHeight"] self._termination_heights = np.array([termination_height] * self.num_bodies) head_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "head") self._termination_heights[head_id] = max(head_term_height, self._termination_heights[head_id]) asset_file = self.cfg["env"]["asset"]["assetFileName"] if (asset_file == "mjcf/amp_humanoid_sword_shield.xml"): left_arm_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "left_lower_arm") self._termination_heights[left_arm_id] = max(shield_term_height, self._termination_heights[left_arm_id]) self._termination_heights = to_torch(self._termination_heights, device=self.device) return def _create_envs(self, num_envs, spacing, num_per_row): lower = gymapi.Vec3(-spacing, -spacing, 0.0) upper = gymapi.Vec3(spacing, spacing, spacing) asset_root = self.cfg["env"]["asset"]["assetRoot"] asset_file = self.cfg["env"]["asset"]["assetFileName"] asset_path = os.path.join(asset_root, asset_file) asset_root = os.path.dirname(asset_path) asset_file = os.path.basename(asset_path) asset_options = gymapi.AssetOptions() asset_options.angular_damping = 0.01 asset_options.max_angular_velocity = 100.0 asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE #asset_options.fix_base_link = True humanoid_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) humanoid_asset_op = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) actuator_props = self.gym.get_asset_actuator_properties(humanoid_asset) motor_efforts = [prop.motor_effort for prop in actuator_props] # create force sensors at the feet right_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "right_foot") left_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "left_foot") # op right_foot_idx_op = self.gym.find_asset_rigid_body_index(humanoid_asset_op, "right_foot") left_foot_idx_op = self.gym.find_asset_rigid_body_index(humanoid_asset_op, "left_foot") sensor_pose = gymapi.Transform() sensor_pose_op = gymapi.Transform() self.gym.create_asset_force_sensor(humanoid_asset, right_foot_idx, sensor_pose) self.gym.create_asset_force_sensor(humanoid_asset, left_foot_idx, sensor_pose) # op self.gym.create_asset_force_sensor(humanoid_asset_op, right_foot_idx_op, sensor_pose_op) self.gym.create_asset_force_sensor(humanoid_asset_op, left_foot_idx_op, sensor_pose_op) self.max_motor_effort = max(motor_efforts) self.motor_efforts = to_torch(motor_efforts, device=self.device) self.torso_index = 0 # 17 bodies self.num_bodies = self.gym.get_asset_rigid_body_count(humanoid_asset) # 31 dofs self.num_dof = self.gym.get_asset_dof_count(humanoid_asset) # 34 joints self.num_joints = self.gym.get_asset_joint_count(humanoid_asset) self.humanoid_handles = [] self.humanoid_handles_op = [] self.humanoid_indices = [] self.humanoid_indices_op = [] self.envs = [] self.dof_limits_lower = [] self.dof_limits_upper = [] for i in range(self.num_envs): # create env instance env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row) self._build_env(i, env_ptr, humanoid_asset, humanoid_asset_op) self.envs.append(env_ptr) dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.humanoid_handles[0]) for j in range(self.num_dof): if dof_prop['lower'][j] > dof_prop['upper'][j]: self.dof_limits_lower.append(dof_prop['upper'][j]) self.dof_limits_upper.append(dof_prop['lower'][j]) else: self.dof_limits_lower.append(dof_prop['lower'][j]) self.dof_limits_upper.append(dof_prop['upper'][j]) self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device) self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device) self.humanoid_indices = to_torch(self.humanoid_indices, dtype=torch.long, device=self.device) self.humanoid_indices_op = to_torch(self.humanoid_indices_op, dtype=torch.long, device=self.device) if (self._pd_control): self._build_pd_action_offset_scale() return def _build_env(self, env_id, env_ptr, humanoid_asset, humanoid_asset_op): col_group = env_id col_filter = self._get_humanoid_collision_filter() segmentation_id = 0 start_pose = gymapi.Transform() start_pose_op = gymapi.Transform() # asset_file = self.cfg["env"]["asset"]["assetFileName"] # char_h = 0.89 start_pose.p = gymapi.Vec3(-self.borderline_space + 2, -self.borderline_space + 2, 0.89) start_pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0) start_pose_op.p = gymapi.Vec3(self.borderline_space - 2, self.borderline_space - 2, 0.89) # start_pose_op.p = gymapi.Vec3(0, 0, 0.89) start_pose_op.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0) humanoid_handle = self.gym.create_actor(env_ptr, humanoid_asset, start_pose, "humanoid", col_group, col_filter, segmentation_id) humanoid_index = self.gym.get_actor_index(env_ptr, humanoid_handle, gymapi.DOMAIN_SIM) humanoid_handle_op = self.gym.create_actor(env_ptr, humanoid_asset_op, start_pose_op, "humanoid", col_group, col_filter, segmentation_id) humanoid_index_op = self.gym.get_actor_index(env_ptr, humanoid_handle_op, gymapi.DOMAIN_SIM) self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle) self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle_op) for j in range(self.num_bodies): self.gym.set_rigid_body_color(env_ptr, humanoid_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.54, 0.85, 0.2)) self.gym.set_rigid_body_color(env_ptr, humanoid_handle_op, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.97, 0.38, 0.06)) if (self._pd_control): dof_prop = self.gym.get_asset_dof_properties(humanoid_asset) dof_prop["driveMode"] = gymapi.DOF_MODE_POS self.gym.set_actor_dof_properties(env_ptr, humanoid_handle, dof_prop) dof_prop_op = self.gym.get_asset_dof_properties(humanoid_asset_op) dof_prop_op["driveMode"] = gymapi.DOF_MODE_POS self.gym.set_actor_dof_properties(env_ptr, humanoid_handle_op, dof_prop_op) self.humanoid_handles.append(humanoid_handle) self.humanoid_indices.append(humanoid_index) self.humanoid_handles_op.append(humanoid_handle_op) self.humanoid_indices_op.append(humanoid_index_op) return def _build_pd_action_offset_scale(self): num_joints = len(self._dof_offsets) - 1 lim_low = self.dof_limits_lower.cpu().numpy() lim_high = self.dof_limits_upper.cpu().numpy() for j in range(num_joints): dof_offset = self._dof_offsets[j] dof_size = self._dof_offsets[j + 1] - self._dof_offsets[j] if (dof_size == 3): curr_low = lim_low[dof_offset:(dof_offset + dof_size)] curr_high = lim_high[dof_offset:(dof_offset + dof_size)] curr_low = np.max(np.abs(curr_low)) curr_high = np.max(np.abs(curr_high)) curr_scale = max([curr_low, curr_high]) curr_scale = 1.2 * curr_scale curr_scale = min([curr_scale, np.pi]) lim_low[dof_offset:(dof_offset + dof_size)] = -curr_scale lim_high[dof_offset:(dof_offset + dof_size)] = curr_scale #lim_low[dof_offset:(dof_offset + dof_size)] = -np.pi #lim_high[dof_offset:(dof_offset + dof_size)] = np.pi elif (dof_size == 1): curr_low = lim_low[dof_offset] curr_high = lim_high[dof_offset] curr_mid = 0.5 * (curr_high + curr_low) # extend the action range to be a bit beyond the joint limits so that the motors # don't lose their strength as they approach the joint limits curr_scale = 0.7 * (curr_high - curr_low) curr_low = curr_mid - curr_scale curr_high = curr_mid + curr_scale lim_low[dof_offset] = curr_low lim_high[dof_offset] = curr_high self._pd_action_offset = 0.5 * (lim_high + lim_low) self._pd_action_scale = 0.5 * (lim_high - lim_low) self._pd_action_offset = to_torch(self._pd_action_offset, device=self.device) self._pd_action_scale = to_torch(self._pd_action_scale, device=self.device) return def _get_humanoid_collision_filter(self): return 0 def _compute_reward(self, actions): self.rew_buf[:] = compute_humanoid_reward(self.obs_buf) return def _compute_reset(self): self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf, self._contact_forces, self._contact_body_ids, self._rigid_body_pos, self.max_episode_length, self._enable_early_termination, self._termination_heights) return def _refresh_sim_tensors(self): self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_rigid_body_state_tensor(self.sim) self.gym.refresh_force_sensor_tensor(self.sim) self.gym.refresh_dof_force_tensor(self.sim) self.gym.refresh_net_contact_force_tensor(self.sim) return def _compute_observations(self): obs, obs_op = self._compute_humanoid_obs() self.obs_buf[:self.num_envs] = obs self.obs_buf[self.num_envs:] = obs_op return def _compute_humanoid_obs(self): body_pos = self._rigid_body_pos body_rot = self._rigid_body_rot body_vel = self._rigid_body_vel body_ang_vel = self._rigid_body_ang_vel body_pos_op = self._rigid_body_pos_op body_rot_op = self._rigid_body_rot_op body_vel_op = self._rigid_body_vel_op body_ang_vel_op = self._rigid_body_ang_vel_op obs = compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, self._local_root_obs, self._root_height_obs) obs_op = compute_humanoid_observations_max(body_pos_op, body_rot_op, body_vel_op, body_ang_vel_op, self._local_root_obs, self._root_height_obs) return obs, obs_op def _reset_actors(self, env_ids): agent_env_ids = expand_env_ids(env_ids, 2) self._humanoid_root_states[agent_env_ids] = self._initial_humanoid_root_states[agent_env_ids] self._dof_pos[env_ids] = self._initial_dof_pos[env_ids] self._dof_vel[env_ids] = self._initial_dof_vel[env_ids] self._dof_pos_op[env_ids] = self._initial_dof_pos_op[env_ids] self._dof_vel_op[env_ids] = self._initial_dof_vel_op[env_ids] return def pre_physics_step(self, actions): self.actions = actions.to(self.device).clone() ego_actions = self.actions[:self.num_envs] op_actions = self.actions[self.num_envs:] if (self._pd_control): pd_tar_ego = self._action_to_pd_targets(ego_actions) pd_tar_op = self._action_to_pd_targets(op_actions) pd_tar = torch.cat([pd_tar_ego, pd_tar_op], dim=-1) pd_tar_tensor = gymtorch.unwrap_tensor(pd_tar) self.gym.set_dof_position_target_tensor(self.sim, pd_tar_tensor) else: forces = self.actions * self.motor_efforts.unsqueeze(0) * self.power_scale force_tensor = gymtorch.unwrap_tensor(forces) self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor) return def post_physics_step(self): self.progress_buf += 1 self._refresh_sim_tensors() self._compute_observations() self._compute_reward(self.actions) self._compute_reset() self.extras["terminate"] = self._terminate_buf # debug viz if self.viewer and self.debug_viz: self._update_debug_viz() return def render(self, sync_frame_time=False): super().render(sync_frame_time) return def _build_key_body_ids_tensor(self, key_body_names): env_ptr = self.envs[0] actor_handle = self.humanoid_handles[0] body_ids = [] for body_name in key_body_names: body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) assert(body_id != -1) body_ids.append(body_id) body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) return body_ids def _build_contact_body_ids_tensor(self, contact_body_names): env_ptr = self.envs[0] actor_handle = self.humanoid_handles[0] body_ids = [] for body_name in contact_body_names: body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) assert(body_id != -1) body_ids.append(body_id) body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) return body_ids def _action_to_pd_targets(self, action): pd_tar = self._pd_action_offset + self._pd_action_scale * action return pd_tar def _update_debug_viz(self): self.gym.clear_lines(self.viewer) return ##################################################################### ###=========================jit functions=========================### ##################################################################### @torch.jit.script def dof_to_obs(pose, dof_obs_size, dof_offsets): # type: (Tensor, int, List[int]) -> Tensor joint_obs_size = 6 num_joints = len(dof_offsets) - 1 dof_obs_shape = pose.shape[:-1] + (dof_obs_size,) dof_obs = torch.zeros(dof_obs_shape, device=pose.device) dof_obs_offset = 0 for j in range(num_joints): dof_offset = dof_offsets[j] dof_size = dof_offsets[j + 1] - dof_offsets[j] joint_pose = pose[:, dof_offset:(dof_offset + dof_size)] # assume this is a spherical joint if (dof_size == 3): joint_pose_q = torch_utils.exp_map_to_quat(joint_pose) elif (dof_size == 1): axis = torch.tensor([0.0, 1.0, 0.0], dtype=joint_pose.dtype, device=pose.device) joint_pose_q = quat_from_angle_axis(joint_pose[..., 0], axis) else: joint_pose_q = None assert(False), "Unsupported joint type" joint_dof_obs = torch_utils.quat_to_tan_norm(joint_pose_q) dof_obs[:, (j * joint_obs_size):((j + 1) * joint_obs_size)] = joint_dof_obs assert((num_joints * joint_obs_size) == dof_obs_size) return dof_obs @torch.jit.script def compute_humanoid_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos, local_root_obs, root_height_obs, dof_obs_size, dof_offsets): # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor root_h = root_pos[:, 2:3] heading_rot = torch_utils.calc_heading_quat_inv(root_rot) if (local_root_obs): root_rot_obs = quat_mul(heading_rot, root_rot) else: root_rot_obs = root_rot root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs) if (not root_height_obs): root_h_obs = torch.zeros_like(root_h) else: root_h_obs = root_h local_root_vel = quat_rotate(heading_rot, root_vel) local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel) root_pos_expand = root_pos.unsqueeze(-2) local_key_body_pos = key_body_pos - root_pos_expand heading_rot_expand = heading_rot.unsqueeze(-2) heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1)) flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2]) flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], heading_rot_expand.shape[2]) local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos) flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2]) dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets) obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1) return obs @torch.jit.script def compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, local_root_obs, root_height_obs): # type: (Tensor, Tensor, Tensor, Tensor, bool, bool) -> Tensor root_pos = body_pos[:, 0, :] # 0: pelvis, root root_rot = body_rot[:, 0, :] root_h = root_pos[:, 2:3] # 1. Height of the root from the ground heading_rot = torch_utils.calc_heading_quat_inv(root_rot) if (not root_height_obs): root_h_obs = torch.zeros_like(root_h) else: root_h_obs = root_h heading_rot_expand = heading_rot.unsqueeze(-2) # num_envs, 1, 4 # num_envs, body_pos.shape[1], 4 heading_rot_expand = heading_rot_expand.repeat((1, body_pos.shape[1], 1)) flat_heading_rot = heading_rot_expand.reshape(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], heading_rot_expand.shape[2]) root_pos_expand = root_pos.unsqueeze(-2) local_body_pos = body_pos - root_pos_expand flat_local_body_pos = local_body_pos.reshape(local_body_pos.shape[0] * local_body_pos.shape[1], local_body_pos.shape[2]) flat_local_body_pos = quat_rotate(flat_heading_rot, flat_local_body_pos) local_body_pos = flat_local_body_pos.reshape(local_body_pos.shape[0], local_body_pos.shape[1] * local_body_pos.shape[2]) local_body_pos = local_body_pos[..., 3:] # remove root pos flat_body_rot = body_rot.reshape(body_rot.shape[0] * body_rot.shape[1], body_rot.shape[2]) flat_local_body_rot = quat_mul(flat_heading_rot, flat_body_rot) flat_local_body_rot_obs = torch_utils.quat_to_tan_norm(flat_local_body_rot) local_body_rot_obs = flat_local_body_rot_obs.reshape(body_rot.shape[0], body_rot.shape[1] * flat_local_body_rot_obs.shape[1]) if (local_root_obs): root_rot_obs = torch_utils.quat_to_tan_norm(root_rot) local_body_rot_obs[..., 0:6] = root_rot_obs flat_body_vel = body_vel.reshape(body_vel.shape[0] * body_vel.shape[1], body_vel.shape[2]) flat_local_body_vel = quat_rotate(flat_heading_rot, flat_body_vel) local_body_vel = flat_local_body_vel.reshape(body_vel.shape[0], body_vel.shape[1] * body_vel.shape[2]) flat_body_ang_vel = body_ang_vel.reshape(body_ang_vel.shape[0] * body_ang_vel.shape[1], body_ang_vel.shape[2]) flat_local_body_ang_vel = quat_rotate(flat_heading_rot, flat_body_ang_vel) local_body_ang_vel = flat_local_body_ang_vel.reshape(body_ang_vel.shape[0], body_ang_vel.shape[1] * body_ang_vel.shape[2]) obs = torch.cat((root_h_obs, local_body_pos, local_body_rot_obs, local_body_vel, local_body_ang_vel), dim=-1) return obs @torch.jit.script def expand_env_ids(env_ids, n_agents): # type: (Tensor, int) -> Tensor device = env_ids.device agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long) for idx in range(n_agents): agent_env_ids[idx::n_agents] = env_ids * n_agents + idx return agent_env_ids @torch.jit.script def compute_humanoid_reward(obs_buf): # type: (Tensor) -> Tensor reward = torch.ones_like(obs_buf[:, 0]) return reward @torch.jit.script def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos, max_episode_length, enable_early_termination, termination_heights): # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor] terminated = torch.zeros_like(reset_buf) if (enable_early_termination): masked_contact_buf = contact_buf.clone() masked_contact_buf[:, contact_body_ids, :] = 0 fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1) fall_contact = torch.any(fall_contact, dim=-1) body_height = rigid_body_pos[..., 2] fall_height = body_height < termination_heights fall_height[:, contact_body_ids] = False fall_height = torch.any(fall_height, dim=-1) has_fallen = torch.logical_and(fall_contact, fall_height) # first timestep can sometimes still have nonzero contact forces # so only check after first couple of steps has_fallen *= (progress_buf > 1) terminated = torch.where(has_fallen, torch.ones_like(reset_buf), terminated) reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated) return reset, terminated ================================================ FILE: timechamber/tasks/ase_humanoid_base/humanoid_amp.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from enum import Enum import numpy as np import torch from isaacgym import gymapi from isaacgym import gymtorch from timechamber.tasks.ase_humanoid_base.humanoid import Humanoid, dof_to_obs from timechamber.utils import gym_util from timechamber.utils.motion_lib import MotionLib from isaacgym.torch_utils import * from utils import torch_utils class HumanoidAMP(Humanoid): class StateInit(Enum): Default = 0 Start = 1 Random = 2 Hybrid = 3 def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): state_init = cfg["env"]["stateInit"] self._state_init = HumanoidAMP.StateInit[state_init] self._hybrid_init_prob = cfg["env"]["hybridInitProb"] self._num_amp_obs_steps = cfg["env"]["numAMPObsSteps"] assert(self._num_amp_obs_steps >= 2) self._reset_default_env_ids = [] self._reset_ref_env_ids = [] super().__init__(cfg=cfg, sim_params=sim_params, physics_engine=physics_engine, device_type=device_type, device_id=device_id, headless=headless) motion_file = cfg['env']['motion_file'] self._load_motion(motion_file) self._amp_obs_buf = torch.zeros((self.num_envs, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float) self._curr_amp_obs_buf = self._amp_obs_buf[:, 0] self._hist_amp_obs_buf = self._amp_obs_buf[:, 1:] self._amp_obs_demo_buf = None return def post_physics_step(self): super().post_physics_step() self._update_hist_amp_obs() self._compute_amp_observations() amp_obs_flat = self._amp_obs_buf.view(-1, self.get_num_amp_obs()) self.extras["amp_obs"] = amp_obs_flat return def get_num_amp_obs(self): return self._num_amp_obs_steps * self._num_amp_obs_per_step def fetch_amp_obs_demo(self, num_samples): if (self._amp_obs_demo_buf is None): self._build_amp_obs_demo_buf(num_samples) else: assert(self._amp_obs_demo_buf.shape[0] == num_samples) motion_ids = self._motion_lib.sample_motions(num_samples) motion_times0 = self._motion_lib.sample_time(motion_ids) amp_obs_demo = self.build_amp_obs_demo(motion_ids, motion_times0) self._amp_obs_demo_buf[:] = amp_obs_demo.view(self._amp_obs_demo_buf.shape) amp_obs_demo_flat = self._amp_obs_demo_buf.view(-1, self.get_num_amp_obs()) return amp_obs_demo_flat def build_amp_obs_demo(self, motion_ids, motion_times0): dt = self.dt motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps]) motion_times = motion_times0.unsqueeze(-1) time_steps = -dt * torch.arange(0, self._num_amp_obs_steps, device=self.device) motion_times = motion_times + time_steps motion_ids = motion_ids.view(-1) motion_times = motion_times.view(-1) root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ = self._motion_lib.get_motion_state(motion_ids, motion_times) amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_pos, self._local_root_obs, self._root_height_obs, self._dof_obs_size, self._dof_offsets) return amp_obs_demo def _build_amp_obs_demo_buf(self, num_samples): self._amp_obs_demo_buf = torch.zeros((num_samples, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float32) return def _setup_character_props(self, key_bodies): super()._setup_character_props(key_bodies) asset_file = self.cfg["env"]["asset"]["assetFileName"] num_key_bodies = len(key_bodies) if (asset_file == "mjcf/amp_humanoid.xml"): self._num_amp_obs_per_step = 13 + self._dof_obs_size + 28 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos] elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"): self._num_amp_obs_per_step = 13 + self._dof_obs_size + 31 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos] else: print("Unsupported character config file: {s}".format(asset_file)) assert(False) return def _load_motion(self, motion_file): assert(self._dof_offsets[-1] == self.num_dof) self._motion_lib = MotionLib(motion_file=motion_file, dof_body_ids=self._dof_body_ids, dof_offsets=self._dof_offsets, key_body_ids=self._key_body_ids.cpu().numpy(), device=self.device) return def _reset_envs(self, env_ids): self._reset_default_env_ids = [] self._reset_ref_env_ids = [] super()._reset_envs(env_ids) self._init_amp_obs(env_ids) return def _reset_actors(self, env_ids): if (self._state_init == HumanoidAMP.StateInit.Default): self._reset_default(env_ids) elif (self._state_init == HumanoidAMP.StateInit.Start or self._state_init == HumanoidAMP.StateInit.Random): self._reset_ref_state_init(env_ids) elif (self._state_init == HumanoidAMP.StateInit.Hybrid): self._reset_hybrid_state_init(env_ids) else: assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init)) return def _reset_default(self, env_ids): super()._reset_actors(env_ids) # self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids] # self._dof_pos[env_ids] = self._initial_dof_pos[env_ids] # self._dof_vel[env_ids] = self._initial_dof_vel[env_ids] # self._reset_default_env_ids = env_ids return def _reset_ref_state_init(self, env_ids): num_envs = env_ids.shape[0] motion_ids = self._motion_lib.sample_motions(num_envs) if (self._state_init == HumanoidAMP.StateInit.Random or self._state_init == HumanoidAMP.StateInit.Hybrid): motion_times = self._motion_lib.sample_time(motion_ids) elif (self._state_init == HumanoidAMP.StateInit.Start): motion_times = torch.zeros(num_envs, device=self.device) else: assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init)) root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ = self._motion_lib.get_motion_state(motion_ids, motion_times) self._set_env_state(env_ids=env_ids, root_pos=root_pos, root_rot=root_rot, dof_pos=dof_pos, root_vel=root_vel, root_ang_vel=root_ang_vel, dof_vel=dof_vel) self._reset_ref_env_ids = env_ids self._reset_ref_motion_ids = motion_ids self._reset_ref_motion_times = motion_times return def _reset_hybrid_state_init(self, env_ids): num_envs = env_ids.shape[0] ref_probs = to_torch(np.array([self._hybrid_init_prob] * num_envs), device=self.device) ref_init_mask = torch.bernoulli(ref_probs) == 1.0 ref_reset_ids = env_ids[ref_init_mask] if (len(ref_reset_ids) > 0): self._reset_ref_state_init(ref_reset_ids) default_reset_ids = env_ids[torch.logical_not(ref_init_mask)] if (len(default_reset_ids) > 0): self._reset_default(default_reset_ids) return def _init_amp_obs(self, env_ids): self._compute_amp_observations(env_ids) if (len(self._reset_default_env_ids) > 0): self._init_amp_obs_default(self._reset_default_env_ids) if (len(self._reset_ref_env_ids) > 0): self._init_amp_obs_ref(self._reset_ref_env_ids, self._reset_ref_motion_ids, self._reset_ref_motion_times) return def _init_amp_obs_default(self, env_ids): curr_amp_obs = self._curr_amp_obs_buf[env_ids].unsqueeze(-2) self._hist_amp_obs_buf[env_ids] = curr_amp_obs return def _init_amp_obs_ref(self, env_ids, motion_ids, motion_times): dt = self.dt motion_ids = torch.tile(motion_ids, [1, self._num_amp_obs_steps - 1]) motion_times = motion_times.unsqueeze(-1) time_steps = -dt * (torch.arange(0, self._num_amp_obs_steps - 1, device=self.device) + 1) motion_times = motion_times + time_steps motion_ids = motion_ids.view(-1) motion_times = motion_times.view(-1) root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ = self._motion_lib.get_motion_state(motion_ids, motion_times) amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_pos, self._local_root_obs, self._root_height_obs, self._dof_obs_size, self._dof_offsets) self._hist_amp_obs_buf[env_ids] = amp_obs_demo.view(self._hist_amp_obs_buf[env_ids].shape) return def _set_env_state(self, env_ids, root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel): self._humanoid_root_states[env_ids, 0:3] = root_pos self._humanoid_root_states[env_ids, 3:7] = root_rot self._humanoid_root_states[env_ids, 7:10] = root_vel self._humanoid_root_states[env_ids, 10:13] = root_ang_vel self._dof_pos[env_ids] = dof_pos self._dof_vel[env_ids] = dof_vel return def _update_hist_amp_obs(self, env_ids=None): if (env_ids is None): self._hist_amp_obs_buf[:] = self._amp_obs_buf[:, 0:(self._num_amp_obs_steps - 1)] else: self._hist_amp_obs_buf[env_ids] = self._amp_obs_buf[env_ids, 0:(self._num_amp_obs_steps - 1)] return def _compute_amp_observations(self, env_ids=None): key_body_pos = self._rigid_body_pos[:, self._key_body_ids, :] if (env_ids is None): self._curr_amp_obs_buf[:] = build_amp_observations(self._rigid_body_pos[:, 0, :], self._rigid_body_rot[:, 0, :], self._rigid_body_vel[:, 0, :], self._rigid_body_ang_vel[:, 0, :], self._dof_pos, self._dof_vel, key_body_pos, self._local_root_obs, self._root_height_obs, self._dof_obs_size, self._dof_offsets) else: self._curr_amp_obs_buf[env_ids] = build_amp_observations(self._rigid_body_pos[env_ids][:, 0, :], self._rigid_body_rot[env_ids][:, 0, :], self._rigid_body_vel[env_ids][:, 0, :], self._rigid_body_ang_vel[env_ids][:, 0, :], self._dof_pos[env_ids], self._dof_vel[env_ids], key_body_pos[env_ids], self._local_root_obs, self._root_height_obs, self._dof_obs_size, self._dof_offsets) return ##################################################################### ###=========================jit functions=========================### ##################################################################### @torch.jit.script def build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos, local_root_obs, root_height_obs, dof_obs_size, dof_offsets): # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor root_h = root_pos[:, 2:3] heading_rot = torch_utils.calc_heading_quat_inv(root_rot) if (local_root_obs): root_rot_obs = quat_mul(heading_rot, root_rot) else: root_rot_obs = root_rot root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs) if (not root_height_obs): root_h_obs = torch.zeros_like(root_h) else: root_h_obs = root_h local_root_vel = quat_rotate(heading_rot, root_vel) local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel) root_pos_expand = root_pos.unsqueeze(-2) local_key_body_pos = key_body_pos - root_pos_expand heading_rot_expand = heading_rot.unsqueeze(-2) heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1)) flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2]) flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], heading_rot_expand.shape[2]) local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos) flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2]) dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets) obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1) return obs ================================================ FILE: timechamber/tasks/ase_humanoid_base/humanoid_amp_task.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch import timechamber.tasks.ase_humanoid_base.humanoid_amp as humanoid_amp class HumanoidAMPTask(humanoid_amp.HumanoidAMP): def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): self._enable_task_obs = cfg["env"]["enableTaskObs"] super().__init__(cfg=cfg, sim_params=sim_params, physics_engine=physics_engine, device_type=device_type, device_id=device_id, headless=headless) return def get_obs_size(self): obs_size = super().get_obs_size() if (self._enable_task_obs): task_obs_size = self.get_task_obs_size() obs_size += task_obs_size return obs_size def get_task_obs_size(self): return 0 def pre_physics_step(self, actions): super().pre_physics_step(actions) self._update_task() return def render(self, sync_frame_time=False): super().render(sync_frame_time) if self.viewer: self._draw_task() return def _update_task(self): return def _reset_envs(self, env_ids): super()._reset_envs(env_ids) self._reset_task(env_ids) return def _reset_task(self, env_ids): return def _compute_observations(self): # humanoid_obs = self._compute_humanoid_obs() # if (self._enable_task_obs): # task_obs = self._compute_task_obs(env_ids=None) # obs = torch.cat([humanoid_obs, task_obs], dim=-1) # else: # obs = humanoid_obs # if (env_ids is None): # self.obs_buf[:] = obs # else: # self.obs_buf[env_ids] = obs obs, obs_op = self._compute_humanoid_obs() if (self._enable_task_obs): task_obs = self._compute_task_obs(env_ids=None) obs = torch.cat([obs, task_obs], dim=-1) # else: self.obs_buf[:self.num_envs] = obs self.obs_buf[self.num_envs:] = obs_op return def _compute_task_obs(self, env_ids=None): return NotImplemented def _compute_reward(self, actions): return NotImplemented def _draw_task(self): return ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/README.md ================================================ # poselib `poselib` is a library for loading, manipulating, and retargeting skeleton poses and motions. It is separated into three modules: `poselib.core` for basic data loading and tensor operations, `poselib.skeleton` for higher-level skeleton operations, and `poselib.visualization` for displaying skeleton poses. This library is built on top of the PyTorch framework and requires data to be in PyTorch tensors. ## poselib.core - `poselib.core.rotation3d`: A set of Torch JIT functions for computing quaternions, transforms, and rotation/transformation matrices. - `quat_*` manipulate and create quaternions in [x, y, z, w] format (where w is the real component). - `transform_*` handle 7D transforms in [quat, pos] format. - `rot_matrix_*` handle 3x3 rotation matrices. - `euclidean_*` handle 4x4 Euclidean transformation matrices. - `poselib.core.tensor_utils`: Provides loading and saving functions for PyTorch tensors. ## poselib.skeleton - `poselib.skeleton.skeleton3d`: Utilities for loading and manipulating skeleton poses, and retargeting poses to different skeletons. - `SkeletonTree` is a class that stores a skeleton as a tree structure. This describes the skeleton topology and joints. - `SkeletonState` describes the static state of a skeleton, and provides both global and local joint angles. - `SkeletonMotion` describes a time-series of skeleton states and provides utilities for computing joint velocities. ## poselib.visualization - `poselib.visualization.common`: Functions used for visualizing skeletons interactively in `matplotlib`. - In SkeletonState visualization, use key `q` to quit window. - In interactive SkeletonMotion visualization, you can use the following key commands: - `w` - loop animation - `x` - play/pause animation - `z` - previous frame - `c` - next frame - `n` - quit window ## Key Features Poselib provides several key features for working with animation data. We list some of the frequently used ones here, and provide instructions and examples on their usage. ### Importing from FBX Poselib supports importing skeletal animation sequences from .fbx format into a SkeletonMotion representation. To use this functionality, you will need to first set up the Python FBX SDK on your machine using the following instructions. This package is necessary to read data from fbx files, which is a proprietary file format owned by Autodesk. The latest FBX SDK tested was FBX SDK 2020.2.1 for Python 3.7, which can be found on the Autodesk website: https://www.autodesk.com/developer-network/platform-technologies/fbx-sdk-2020-2-1. Follow the instructions at https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html for download, install, and copy/paste instructions for the FBX Python SDK. This repo provides an example script `fbx_importer.py` that shows usage of importing a .fbx file. Note that `SkeletonMotion.from_fbx()` takes in an optional parameter `root_joint`, which can be used to specify a joint in the skeleton tree as the root joint. If `root_joint` is not specified, we will default to using the first node in the FBX scene that contains animation data. ### Importing from MJCF MJCF is a robotics file format supported by Isaac Gym. For convenience, we provide an API for importing MJCF assets into SkeletonTree definitions to represent the skeleton topology. An example script `mjcf_importer.py` is provided to show usage of this. This can be helpful if motion sequences need to be retargeted to your simulation skeleton that's been created in MJCF format. Importing the file to SkeletonTree format will allow you to generate T-poses or other retargeting poses that can be used for retargeting. We also show an example of creating a T-Pose for our AMP Humanoid asset in `generate_amp_humanoid_tpose.py`. ### Retargeting Motions Retargeting motions is important when your source data uses skeletons that have different morphologies than your target skeletons. We provide APIs for performing retarget of motion sequences in our SkeletonState and SkeletonMotion classes. To use the retargeting API, users must provide the following information: - source_motion: a SkeletonMotion npy representation of a motion sequence. The motion clip should use the same skeleton as the source T-Pose skeleton. - target_motion_path: path to save the retargeted motion to - source_tpose: a SkeletonState npy representation of the source skeleton in it's T-Pose state - target_tpose: a SkeletonState npy representation of the target skeleton in it's T-Pose state (pose should match source T-Pose) - joint_mapping: mapping of joint names from source to target - rotation: root rotation offset from source to target skeleton (for transforming across different orientation axes), represented as a quaternion in XYZW order. - scale: scale offset from source to target skeleton We provide an example script `retarget_motion.py` to demonstrate usage of the retargeting API for the CMU Motion Capture Database. Note that the retargeting data for this script is stored in `data/configs/retarget_cmu_to_amp.json`. Additionally, a SkeletonState T-Pose file and retargeting config file are also provided for the SFU Motion Capture Database. These can be found at `data/sfu_tpose.npy` and `data/configs/retarget_sfu_to_amp.json`. ### Documentation We provide a description of the functions and classes available in poselib in the comments of the APIs. Please check them out for more details. ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/data/configs/retarget_cmu_to_amp.json ================================================ { "source_motion": "data/01_01_cmu.npy", "target_motion_path": "data/01_01_cmu_amp.npy", "source_tpose": "data/cmu_tpose.npy", "target_tpose": "data/amp_humanoid_tpose.npy", "joint_mapping": { "Hips": "pelvis", "LeftUpLeg": "left_thigh", "LeftLeg": "left_shin", "LeftFoot": "left_foot", "RightUpLeg": "right_thigh", "RightLeg": "right_shin", "RightFoot": "right_foot", "Spine1": "torso", "Head": "head", "LeftArm": "left_upper_arm", "LeftForeArm": "left_lower_arm", "LeftHand": "left_hand", "RightArm": "right_upper_arm", "RightForeArm": "right_lower_arm", "RightHand": "right_hand" }, "rotation": [0, 0, 0.7071068, 0.7071068], "scale": 0.056444, "root_height_offset": 0.05, "trim_frame_beg": 75, "trim_frame_end": 372 } ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/data/configs/retarget_sfu_to_amp.json ================================================ { "source_motion": "data/0005_Jogging001.npy", "target_motion_path": "data/0005_Jogging001_amp.npy", "source_tpose": "data/sfu_tpose.npy", "target_tpose": "data/amp_humanoid_tpose.npy", "joint_mapping": { "Hips": "pelvis", "LeftUpLeg": "left_thigh", "LeftLeg": "left_shin", "LeftFoot": "left_foot", "RightUpLeg": "right_thigh", "RightLeg": "right_shin", "RightFoot": "right_foot", "Spine1": "torso", "Head": "head", "LeftArm": "left_upper_arm", "LeftForeArm": "left_lower_arm", "LeftHand": "left_hand", "RightArm": "right_upper_arm", "RightForeArm": "right_lower_arm", "RightHand": "right_hand" }, "rotation": [0.5, 0.5, 0.5, 0.5], "scale": 0.01, "root_height_offset": 0.0, "trim_frame_beg": 0, "trim_frame_end": 100 } ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/fbx_importer.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os import json from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState, SkeletonMotion from poselib.visualization.common import plot_skeleton_state, plot_skeleton_motion_interactive # source fbx file path fbx_file = "data/01_01_cmu.fbx" # import fbx file - make sure to provide a valid joint name for root_joint motion = SkeletonMotion.from_fbx( fbx_file_path=fbx_file, root_joint="Hips", fps=60 ) # save motion in npy format motion.to_file("data/01_01_cmu.npy") # visualize motion plot_skeleton_motion_interactive(motion) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/generate_amp_humanoid_tpose.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch from poselib.core.rotation3d import * from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState from poselib.visualization.common import plot_skeleton_state """ This scripts imports a MJCF XML file and converts the skeleton into a SkeletonTree format. It then generates a zero rotation pose, and adjusts the pose into a T-Pose. """ # import MJCF file xml_path = "../../../../assets/mjcf/amp_humanoid.xml" skeleton = SkeletonTree.from_mjcf(xml_path) # generate zero rotation pose zero_pose = SkeletonState.zero_pose(skeleton) # adjust pose into a T Pose local_rotation = zero_pose.local_rotation local_rotation[skeleton.index("left_upper_arm")] = quat_mul( quat_from_angle_axis(angle=torch.tensor([90.0]), axis=torch.tensor([1.0, 0.0, 0.0]), degree=True), local_rotation[skeleton.index("left_upper_arm")] ) local_rotation[skeleton.index("right_upper_arm")] = quat_mul( quat_from_angle_axis(angle=torch.tensor([-90.0]), axis=torch.tensor([1.0, 0.0, 0.0]), degree=True), local_rotation[skeleton.index("right_upper_arm")] ) translation = zero_pose.root_translation translation += torch.tensor([0, 0, 0.9]) # save and visualize T-pose zero_pose.to_file("data/amp_humanoid_tpose.npy") plot_skeleton_state(zero_pose) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/mjcf_importer.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState from poselib.visualization.common import plot_skeleton_state # load in XML mjcf file and save zero rotation pose in npy format xml_path = "../../../../assets/mjcf/nv_humanoid.xml" skeleton = SkeletonTree.from_mjcf(xml_path) zero_pose = SkeletonState.zero_pose(skeleton) zero_pose.to_file("data/nv_humanoid.npy") # visualize zero rotation pose plot_skeleton_state(zero_pose) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/__init__.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. __version__ = "0.0.1" from .core import * ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/__init__.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. from .tensor_utils import * from .rotation3d import * from .backend import Serializable, logger ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/__init__.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. from .abstract import Serializable from .logger import logger ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/abstract.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from abc import ABCMeta, abstractmethod, abstractclassmethod from collections import OrderedDict import json import numpy as np import os TENSOR_CLASS = {} def register(name): global TENSOR_CLASS def core(tensor_cls): TENSOR_CLASS[name] = tensor_cls return tensor_cls return core def _get_cls(name): global TENSOR_CLASS return TENSOR_CLASS[name] class NumpyEncoder(json.JSONEncoder): """ Special json encoder for numpy types """ def default(self, obj): if isinstance( obj, ( np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, ), ): return int(obj) elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): return float(obj) elif isinstance(obj, (np.ndarray,)): return dict(__ndarray__=obj.tolist(), dtype=str(obj.dtype), shape=obj.shape) return json.JSONEncoder.default(self, obj) def json_numpy_obj_hook(dct): if isinstance(dct, dict) and "__ndarray__" in dct: data = np.asarray(dct["__ndarray__"], dtype=dct["dtype"]) return data.reshape(dct["shape"]) return dct class Serializable: """ Implementation to read/write to file. All class the is inherited from this class needs to implement to_dict() and from_dict() """ @abstractclassmethod def from_dict(cls, dict_repr, *args, **kwargs): """ Read the object from an ordered dictionary :param dict_repr: the ordered dictionary that is used to construct the object :type dict_repr: OrderedDict :param args, kwargs: the arguments that need to be passed into from_dict() :type args, kwargs: additional arguments """ pass @abstractmethod def to_dict(self): """ Construct an ordered dictionary from the object :rtype: OrderedDict """ pass @classmethod def from_file(cls, path, *args, **kwargs): """ Read the object from a file (either .npy or .json) :param path: path of the file :type path: string :param args, kwargs: the arguments that need to be passed into from_dict() :type args, kwargs: additional arguments """ if path.endswith(".json"): with open(path, "r") as f: d = json.load(f, object_hook=json_numpy_obj_hook) elif path.endswith(".npy"): d = np.load(path, allow_pickle=True).item() else: assert False, "failed to load {} from {}".format(cls.__name__, path) assert d["__name__"] == cls.__name__, "the file belongs to {}, not {}".format( d["__name__"], cls.__name__ ) return cls.from_dict(d, *args, **kwargs) def to_file(self, path: str) -> None: """ Write the object to a file (either .npy or .json) :param path: path of the file :type path: string """ if os.path.dirname(path) != "" and not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) d = self.to_dict() d["__name__"] = self.__class__.__name__ if path.endswith(".json"): with open(path, "w") as f: json.dump(d, f, cls=NumpyEncoder, indent=4) elif path.endswith(".npy"): np.save(path, d) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/backend/logger.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import logging logger = logging.getLogger("poselib") logger.setLevel(logging.INFO) if not len(logger.handlers): formatter = logging.Formatter( fmt="%(asctime)-15s - %(levelname)s - %(module)s - %(message)s" ) handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler) logger.info("logger initialized") ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/rotation3d.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from typing import List, Optional import math import torch @torch.jit.script def quat_mul(a, b): """ quaternion multiplication """ x1, y1, z1, w1 = a[..., 0], a[..., 1], a[..., 2], a[..., 3] x2, y2, z2, w2 = b[..., 0], b[..., 1], b[..., 2], b[..., 3] w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2 z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2 return torch.stack([x, y, z, w], dim=-1) @torch.jit.script def quat_pos(x): """ make all the real part of the quaternion positive """ q = x z = (q[..., 3:] < 0).float() q = (1 - 2 * z) * q return q @torch.jit.script def quat_abs(x): """ quaternion norm (unit quaternion represents a 3D rotation, which has norm of 1) """ x = x.norm(p=2, dim=-1) return x @torch.jit.script def quat_unit(x): """ normalized quaternion with norm of 1 """ norm = quat_abs(x).unsqueeze(-1) return x / (norm.clamp(min=1e-9)) @torch.jit.script def quat_conjugate(x): """ quaternion with its imaginary part negated """ return torch.cat([-x[..., :3], x[..., 3:]], dim=-1) @torch.jit.script def quat_real(x): """ real component of the quaternion """ return x[..., 3] @torch.jit.script def quat_imaginary(x): """ imaginary components of the quaternion """ return x[..., :3] @torch.jit.script def quat_norm_check(x): """ verify that a quaternion has norm 1 """ assert bool( (abs(x.norm(p=2, dim=-1) - 1) < 1e-3).all() ), "the quaternion is has non-1 norm: {}".format(abs(x.norm(p=2, dim=-1) - 1)) assert bool((x[..., 3] >= 0).all()), "the quaternion has negative real part" @torch.jit.script def quat_normalize(q): """ Construct 3D rotation from quaternion (the quaternion needs not to be normalized). """ q = quat_unit(quat_pos(q)) # normalized to positive and unit quaternion return q @torch.jit.script def quat_from_xyz(xyz): """ Construct 3D rotation from the imaginary component """ w = (1.0 - xyz.norm()).unsqueeze(-1) assert bool((w >= 0).all()), "xyz has its norm greater than 1" return torch.cat([xyz, w], dim=-1) @torch.jit.script def quat_identity(shape: List[int]): """ Construct 3D identity rotation given shape """ w = torch.ones(shape + [1]) xyz = torch.zeros(shape + [3]) q = torch.cat([xyz, w], dim=-1) return quat_normalize(q) @torch.jit.script def quat_from_angle_axis(angle, axis, degree: bool = False): """ Create a 3D rotation from angle and axis of rotation. The rotation is counter-clockwise along the axis. The rotation can be interpreted as a_R_b where frame "b" is the new frame that gets rotated counter-clockwise along the axis from frame "a" :param angle: angle of rotation :type angle: Tensor :param axis: axis of rotation :type axis: Tensor :param degree: put True here if the angle is given by degree :type degree: bool, optional, default=False """ if degree: angle = angle / 180.0 * math.pi theta = (angle / 2).unsqueeze(-1) axis = axis / (axis.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-9)) xyz = axis * theta.sin() w = theta.cos() return quat_normalize(torch.cat([xyz, w], dim=-1)) @torch.jit.script def quat_from_rotation_matrix(m): """ Construct a 3D rotation from a valid 3x3 rotation matrices. Reference can be found here: http://www.cg.info.hiroshima-cu.ac.jp/~miyazaki/knowledge/teche52.html :param m: 3x3 orthogonal rotation matrices. :type m: Tensor :rtype: Tensor """ m = m.unsqueeze(0) diag0 = m[..., 0, 0] diag1 = m[..., 1, 1] diag2 = m[..., 2, 2] # Math stuff. w = (((diag0 + diag1 + diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5 x = (((diag0 - diag1 - diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5 y = (((-diag0 + diag1 - diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5 z = (((-diag0 - diag1 + diag2 + 1.0) / 4.0).clamp(0.0, None)) ** 0.5 # Only modify quaternions where w > x, y, z. c0 = (w >= x) & (w >= y) & (w >= z) x[c0] *= (m[..., 2, 1][c0] - m[..., 1, 2][c0]).sign() y[c0] *= (m[..., 0, 2][c0] - m[..., 2, 0][c0]).sign() z[c0] *= (m[..., 1, 0][c0] - m[..., 0, 1][c0]).sign() # Only modify quaternions where x > w, y, z c1 = (x >= w) & (x >= y) & (x >= z) w[c1] *= (m[..., 2, 1][c1] - m[..., 1, 2][c1]).sign() y[c1] *= (m[..., 1, 0][c1] + m[..., 0, 1][c1]).sign() z[c1] *= (m[..., 0, 2][c1] + m[..., 2, 0][c1]).sign() # Only modify quaternions where y > w, x, z. c2 = (y >= w) & (y >= x) & (y >= z) w[c2] *= (m[..., 0, 2][c2] - m[..., 2, 0][c2]).sign() x[c2] *= (m[..., 1, 0][c2] + m[..., 0, 1][c2]).sign() z[c2] *= (m[..., 2, 1][c2] + m[..., 1, 2][c2]).sign() # Only modify quaternions where z > w, x, y. c3 = (z >= w) & (z >= x) & (z >= y) w[c3] *= (m[..., 1, 0][c3] - m[..., 0, 1][c3]).sign() x[c3] *= (m[..., 2, 0][c3] + m[..., 0, 2][c3]).sign() y[c3] *= (m[..., 2, 1][c3] + m[..., 1, 2][c3]).sign() return quat_normalize(torch.stack([x, y, z, w], dim=-1)).squeeze(0) @torch.jit.script def quat_mul_norm(x, y): """ Combine two set of 3D rotations together using \**\* operator. The shape needs to be broadcastable """ return quat_normalize(quat_mul(x, y)) @torch.jit.script def quat_rotate(rot, vec): """ Rotate a 3D vector with the 3D rotation """ other_q = torch.cat([vec, torch.zeros_like(vec[..., :1])], dim=-1) return quat_imaginary(quat_mul(quat_mul(rot, other_q), quat_conjugate(rot))) @torch.jit.script def quat_inverse(x): """ The inverse of the rotation """ return quat_conjugate(x) @torch.jit.script def quat_identity_like(x): """ Construct identity 3D rotation with the same shape """ return quat_identity(x.shape[:-1]) @torch.jit.script def quat_angle_axis(x): """ The (angle, axis) representation of the rotation. The axis is normalized to unit length. The angle is guaranteed to be between [0, pi]. """ s = 2 * (x[..., 3] ** 2) - 1 angle = s.clamp(-1, 1).arccos() # just to be safe axis = x[..., :3] axis /= axis.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-9) return angle, axis @torch.jit.script def quat_yaw_rotation(x, z_up: bool = True): """ Yaw rotation (rotation along z-axis) """ q = x if z_up: q = torch.cat([torch.zeros_like(q[..., 0:2]), q[..., 2:3], q[..., 3:]], dim=-1) else: q = torch.cat( [ torch.zeros_like(q[..., 0:1]), q[..., 1:2], torch.zeros_like(q[..., 2:3]), q[..., 3:4], ], dim=-1, ) return quat_normalize(q) @torch.jit.script def transform_from_rotation_translation( r: Optional[torch.Tensor] = None, t: Optional[torch.Tensor] = None ): """ Construct a transform from a quaternion and 3D translation. Only one of them can be None. """ assert r is not None or t is not None, "rotation and translation can't be all None" if r is None: assert t is not None r = quat_identity(list(t.shape)) if t is None: t = torch.zeros(list(r.shape) + [3]) return torch.cat([r, t], dim=-1) @torch.jit.script def transform_identity(shape: List[int]): """ Identity transformation with given shape """ r = quat_identity(shape) t = torch.zeros(shape + [3]) return transform_from_rotation_translation(r, t) @torch.jit.script def transform_rotation(x): """Get rotation from transform""" return x[..., :4] @torch.jit.script def transform_translation(x): """Get translation from transform""" return x[..., 4:] @torch.jit.script def transform_inverse(x): """ Inverse transformation """ inv_so3 = quat_inverse(transform_rotation(x)) return transform_from_rotation_translation( r=inv_so3, t=quat_rotate(inv_so3, -transform_translation(x)) ) @torch.jit.script def transform_identity_like(x): """ identity transformation with the same shape """ return transform_identity(x.shape) @torch.jit.script def transform_mul(x, y): """ Combine two transformation together """ z = transform_from_rotation_translation( r=quat_mul_norm(transform_rotation(x), transform_rotation(y)), t=quat_rotate(transform_rotation(x), transform_translation(y)) + transform_translation(x), ) return z @torch.jit.script def transform_apply(rot, vec): """ Transform a 3D vector """ assert isinstance(vec, torch.Tensor) return quat_rotate(transform_rotation(rot), vec) + transform_translation(rot) @torch.jit.script def rot_matrix_det(x): """ Return the determinant of the 3x3 matrix. The shape of the tensor will be as same as the shape of the matrix """ a, b, c = x[..., 0, 0], x[..., 0, 1], x[..., 0, 2] d, e, f = x[..., 1, 0], x[..., 1, 1], x[..., 1, 2] g, h, i = x[..., 2, 0], x[..., 2, 1], x[..., 2, 2] t1 = a * (e * i - f * h) t2 = b * (d * i - f * g) t3 = c * (d * h - e * g) return t1 - t2 + t3 @torch.jit.script def rot_matrix_integrity_check(x): """ Verify that a rotation matrix has a determinant of one and is orthogonal """ det = rot_matrix_det(x) assert bool((abs(det - 1) < 1e-3).all()), "the matrix has non-one determinant" rtr = x @ x.permute(torch.arange(x.dim() - 2), -1, -2) rtr_gt = rtr.zeros_like() rtr_gt[..., 0, 0] = 1 rtr_gt[..., 1, 1] = 1 rtr_gt[..., 2, 2] = 1 assert bool(((rtr - rtr_gt) < 1e-3).all()), "the matrix is not orthogonal" @torch.jit.script def rot_matrix_from_quaternion(q): """ Construct rotation matrix from quaternion """ # Shortcuts for individual elements (using wikipedia's convention) qi, qj, qk, qr = q[..., 0], q[..., 1], q[..., 2], q[..., 3] # Set individual elements R00 = 1.0 - 2.0 * (qj ** 2 + qk ** 2) R01 = 2 * (qi * qj - qk * qr) R02 = 2 * (qi * qk + qj * qr) R10 = 2 * (qi * qj + qk * qr) R11 = 1.0 - 2.0 * (qi ** 2 + qk ** 2) R12 = 2 * (qj * qk - qi * qr) R20 = 2 * (qi * qk - qj * qr) R21 = 2 * (qj * qk + qi * qr) R22 = 1.0 - 2.0 * (qi ** 2 + qj ** 2) R0 = torch.stack([R00, R01, R02], dim=-1) R1 = torch.stack([R10, R11, R12], dim=-1) R2 = torch.stack([R10, R21, R22], dim=-1) R = torch.stack([R0, R1, R2], dim=-2) return R @torch.jit.script def euclidean_to_rotation_matrix(x): """ Get the rotation matrix on the top-left corner of a Euclidean transformation matrix """ return x[..., :3, :3] @torch.jit.script def euclidean_integrity_check(x): euclidean_to_rotation_matrix(x) # check 3d-rotation matrix assert bool((x[..., 3, :3] == 0).all()), "the last row is illegal" assert bool((x[..., 3, 3] == 1).all()), "the last row is illegal" @torch.jit.script def euclidean_translation(x): """ Get the translation vector located at the last column of the matrix """ return x[..., :3, 3] @torch.jit.script def euclidean_inverse(x): """ Compute the matrix that represents the inverse rotation """ s = x.zeros_like() irot = quat_inverse(quat_from_rotation_matrix(x)) s[..., :3, :3] = irot s[..., :3, 4] = quat_rotate(irot, -euclidean_translation(x)) return s @torch.jit.script def euclidean_to_transform(transformation_matrix): """ Construct a transform from a Euclidean transformation matrix """ return transform_from_rotation_translation( r=quat_from_rotation_matrix( m=euclidean_to_rotation_matrix(transformation_matrix) ), t=euclidean_translation(transformation_matrix), ) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tensor_utils.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. from collections import OrderedDict from .backend import Serializable import torch class TensorUtils(Serializable): @classmethod def from_dict(cls, dict_repr, *args, **kwargs): """ Read the object from an ordered dictionary :param dict_repr: the ordered dictionary that is used to construct the object :type dict_repr: OrderedDict :param kwargs: the arguments that need to be passed into from_dict() :type kwargs: additional arguments """ return torch.from_numpy(dict_repr["arr"].astype(dict_repr["context"]["dtype"])) def to_dict(self): """ Construct an ordered dictionary from the object :rtype: OrderedDict """ return NotImplemented def tensor_to_dict(x): """ Construct an ordered dictionary from the object :rtype: OrderedDict """ x_np = x.numpy() return { "arr": x_np, "context": { "dtype": x_np.dtype.name } } ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tests/__init__.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/core/tests/test_rotation.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from ..rotation3d import * import numpy as np import torch q = torch.from_numpy(np.array([[0, 1, 2, 3], [-2, 3, -1, 5]], dtype=np.float32)) print("q", q) r = quat_normalize(q) x = torch.from_numpy(np.array([[1, 0, 0], [0, -1, 0]], dtype=np.float32)) print(r) print(quat_rotate(r, x)) angle = torch.from_numpy(np.array(np.random.rand() * 10.0, dtype=np.float32)) axis = torch.from_numpy( np.array([1, np.random.rand() * 10.0, np.random.rand() * 10.0], dtype=np.float32), ) print(repr(angle)) print(repr(axis)) rot = quat_from_angle_axis(angle, axis) x = torch.from_numpy(np.random.rand(5, 6, 3)) y = quat_rotate(quat_inverse(rot), quat_rotate(rot, x)) print(x.numpy()) print(y.numpy()) assert np.allclose(x.numpy(), y.numpy()) m = torch.from_numpy(np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]], dtype=np.float32)) r = quat_from_rotation_matrix(m) t = torch.from_numpy(np.array([0, 1, 0], dtype=np.float32)) se3 = transform_from_rotation_translation(r=r, t=t) print(se3) print(transform_apply(se3, t)) rot = quat_from_angle_axis( torch.from_numpy(np.array([45, -54], dtype=np.float32)), torch.from_numpy(np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)), degree=True, ) trans = torch.from_numpy(np.array([[1, 1, 0], [1, 1, 0]], dtype=np.float32)) transform = transform_from_rotation_translation(r=rot, t=trans) t = transform_mul(transform, transform_inverse(transform)) gt = np.zeros((2, 7)) gt[:, 0] = 1.0 print(t.numpy()) print(gt) # assert np.allclose(t.numpy(), gt) transform2 = torch.from_numpy( np.array( [[1, 0, 0, 1], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], dtype=np.float32 ), ) transform2 = euclidean_to_transform(transform2) print(transform2) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/__init__.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/__init__.py ================================================ ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/__init__.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/fbx_backend.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ This script reads an fbx file and returns the joint names, parents, and transforms. NOTE: It requires the Python FBX package to be installed. """ import sys import numpy as np try: import fbx import FbxCommon except ImportError as e: print("Error: FBX library failed to load - importing FBX data will not succeed. Message: {}".format(e)) print("FBX tools must be installed from https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html") def fbx_to_npy(file_name_in, root_joint_name, fps): """ This function reads in an fbx file, and saves the relevant info to a numpy array Fbx files have a series of animation curves, each of which has animations at different times. This script assumes that for mocap data, there is only one animation curve that contains all the joints. Otherwise it is unclear how to read in the data. If this condition isn't met, then the method throws an error :param file_name_in: str, file path in. Should be .fbx file :return: nothing, it just writes a file. """ # Create the fbx scene object and load the .fbx file fbx_sdk_manager, fbx_scene = FbxCommon.InitializeSdkObjects() FbxCommon.LoadScene(fbx_sdk_manager, fbx_scene, file_name_in) """ To read in the animation, we must find the root node of the skeleton. Unfortunately fbx files can have "scene parents" and other parts of the tree that are not joints As a crude fix, this reader just takes and finds the first thing which has an animation curve attached """ search_root = (root_joint_name is None or root_joint_name == "") # Get the root node of the skeleton, which is the child of the scene's root node possible_root_nodes = [fbx_scene.GetRootNode()] found_root_node = False max_key_count = 0 root_joint = None while len(possible_root_nodes) > 0: joint = possible_root_nodes.pop(0) if not search_root: if joint.GetName() == root_joint_name: root_joint = joint try: curve = _get_animation_curve(joint, fbx_scene) except RuntimeError: curve = None if curve is not None: key_count = curve.KeyGetCount() if key_count > max_key_count: found_root_node = True max_key_count = key_count root_curve = curve if search_root and not root_joint: root_joint = joint for child_index in range(joint.GetChildCount()): possible_root_nodes.append(joint.GetChild(child_index)) if not found_root_node: raise RuntimeError("No root joint found!! Exiting") joint_list, joint_names, parents = _get_skeleton(root_joint) """ Read in the transformation matrices of the animation, taking the scaling into account """ anim_range, frame_count, frame_rate = _get_frame_count(fbx_scene) local_transforms = [] #for frame in range(frame_count): time_sec = anim_range.GetStart().GetSecondDouble() time_range_sec = anim_range.GetStop().GetSecondDouble() - time_sec fbx_fps = frame_count / time_range_sec if fps != 120: fbx_fps = fps print("FPS: ", fbx_fps) while time_sec < anim_range.GetStop().GetSecondDouble(): fbx_time = fbx.FbxTime() fbx_time.SetSecondDouble(time_sec) fbx_time = fbx_time.GetFramedTime() transforms_current_frame = [] # Fbx has a unique time object which you need #fbx_time = root_curve.KeyGetTime(frame) for joint in joint_list: arr = np.array(_recursive_to_list(joint.EvaluateLocalTransform(fbx_time))) scales = np.array(_recursive_to_list(joint.EvaluateLocalScaling(fbx_time))) if not np.allclose(scales[0:3], scales[0]): raise ValueError( "Different X, Y and Z scaling. Unsure how this should be handled. " "To solve this, look at this link and try to upgrade the script " "http://help.autodesk.com/view/FBX/2017/ENU/?guid=__files_GUID_10CDD" "63C_79C1_4F2D_BB28_AD2BE65A02ED_htm" ) # Adjust the array for scaling arr /= scales[0] arr[3, 3] = 1.0 transforms_current_frame.append(arr) local_transforms.append(transforms_current_frame) time_sec += (1.0/fbx_fps) local_transforms = np.array(local_transforms) print("Frame Count: ", len(local_transforms)) return joint_names, parents, local_transforms, fbx_fps def _get_frame_count(fbx_scene): # Get the animation stacks and layers, in order to pull off animation curves later num_anim_stacks = fbx_scene.GetSrcObjectCount( FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId) ) # if num_anim_stacks != 1: # raise RuntimeError( # "More than one animation stack was found. " # "This script must be modified to handle this case. Exiting" # ) if num_anim_stacks > 1: index = 1 else: index = 0 anim_stack = fbx_scene.GetSrcObject( FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId), index ) anim_range = anim_stack.GetLocalTimeSpan() duration = anim_range.GetDuration() fps = duration.GetFrameRate(duration.GetGlobalTimeMode()) frame_count = duration.GetFrameCount(True) return anim_range, frame_count, fps def _get_animation_curve(joint, fbx_scene): # Get the animation stacks and layers, in order to pull off animation curves later num_anim_stacks = fbx_scene.GetSrcObjectCount( FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId) ) # if num_anim_stacks != 1: # raise RuntimeError( # "More than one animation stack was found. " # "This script must be modified to handle this case. Exiting" # ) if num_anim_stacks > 1: index = 1 else: index = 0 anim_stack = fbx_scene.GetSrcObject( FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimStack.ClassId), index ) num_anim_layers = anim_stack.GetSrcObjectCount( FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimLayer.ClassId) ) if num_anim_layers != 1: raise RuntimeError( "More than one animation layer was found. " "This script must be modified to handle this case. Exiting" ) animation_layer = anim_stack.GetSrcObject( FbxCommon.FbxCriteria.ObjectType(FbxCommon.FbxAnimLayer.ClassId), 0 ) def _check_longest_curve(curve, max_curve_key_count): longest_curve = None if curve and curve.KeyGetCount() > max_curve_key_count[0]: max_curve_key_count[0] = curve.KeyGetCount() return True return False max_curve_key_count = [0] longest_curve = None for c in ["X", "Y", "Z"]: curve = joint.LclTranslation.GetCurve( animation_layer, c ) # sample curve for translation if _check_longest_curve(curve, max_curve_key_count): longest_curve = curve curve = joint.LclRotation.GetCurve( animation_layer, "X" ) if _check_longest_curve(curve, max_curve_key_count): longest_curve = curve return longest_curve def _get_skeleton(root_joint): # Do a depth first search of the skeleton to extract all the joints joint_list = [root_joint] joint_names = [root_joint.GetName()] parents = [-1] # -1 means no parent def append_children(joint, pos): """ Depth first search function :param joint: joint item in the fbx :param pos: position of current element (for parenting) :return: Nothing """ for child_index in range(joint.GetChildCount()): child = joint.GetChild(child_index) joint_list.append(child) joint_names.append(child.GetName()) parents.append(pos) append_children(child, len(parents) - 1) append_children(root_joint, 0) return joint_list, joint_names, parents def _recursive_to_list(array): """ Takes some iterable that might contain iterables and converts it to a list of lists [of lists... etc] Mainly used for converting the strange fbx wrappers for c++ arrays into python lists :param array: array to be converted :return: array converted to lists """ try: return float(array) except TypeError: return [_recursive_to_list(a) for a in array] def parse_fbx(file_name_in, root_joint_name, fps): return fbx_to_npy(file_name_in, root_joint_name, fps) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/backend/fbx/fbx_read_wrapper.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. """ Script that reads in fbx files from python This requires a configs file, which contains the command necessary to switch conda environments to run the fbx reading script from python """ from ....core import logger import inspect import os import numpy as np from .fbx_backend import parse_fbx def fbx_to_array(fbx_file_path, root_joint, fps): """ Reads an fbx file to an array. :param fbx_file_path: str, file path to fbx :return: tuple with joint_names, parents, transforms, frame time """ # Ensure the file path is valid fbx_file_path = os.path.abspath(fbx_file_path) assert os.path.exists(fbx_file_path) # Parse FBX file joint_names, parents, local_transforms, fbx_fps = parse_fbx(fbx_file_path, root_joint, fps) return joint_names, parents, local_transforms, fbx_fps ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/skeleton/skeleton3d.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os import xml.etree.ElementTree as ET from collections import OrderedDict from typing import List, Optional, Type, Dict import numpy as np import torch from ..core import * from .backend.fbx.fbx_read_wrapper import fbx_to_array import scipy.ndimage.filters as filters class SkeletonTree(Serializable): """ A skeleton tree gives a complete description of a rigid skeleton. It describes a tree structure over a list of nodes with their names indicated by strings. Each edge in the tree has a local translation associated with it which describes the distance between the two nodes that it connects. Basic Usage: >>> t = SkeletonTree.from_mjcf(SkeletonTree.__example_mjcf_path__) >>> t SkeletonTree( node_names=['torso', 'front_left_leg', 'aux_1', 'front_left_foot', 'front_right_leg', 'aux_2', 'front_right_foot', 'left_back_leg', 'aux_3', 'left_back_foot', 'right_back_leg', 'aux_4', 'right_back_foot'], parent_indices=tensor([-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 0, 10, 11]), local_translation=tensor([[ 0.0000, 0.0000, 0.7500], [ 0.0000, 0.0000, 0.0000], [ 0.2000, 0.2000, 0.0000], [ 0.2000, 0.2000, 0.0000], [ 0.0000, 0.0000, 0.0000], [-0.2000, 0.2000, 0.0000], [-0.2000, 0.2000, 0.0000], [ 0.0000, 0.0000, 0.0000], [-0.2000, -0.2000, 0.0000], [-0.2000, -0.2000, 0.0000], [ 0.0000, 0.0000, 0.0000], [ 0.2000, -0.2000, 0.0000], [ 0.2000, -0.2000, 0.0000]]) ) >>> t.node_names ['torso', 'front_left_leg', 'aux_1', 'front_left_foot', 'front_right_leg', 'aux_2', 'front_right_foot', 'left_back_leg', 'aux_3', 'left_back_foot', 'right_back_leg', 'aux_4', 'right_back_foot'] >>> t.parent_indices tensor([-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 0, 10, 11]) >>> t.local_translation tensor([[ 0.0000, 0.0000, 0.7500], [ 0.0000, 0.0000, 0.0000], [ 0.2000, 0.2000, 0.0000], [ 0.2000, 0.2000, 0.0000], [ 0.0000, 0.0000, 0.0000], [-0.2000, 0.2000, 0.0000], [-0.2000, 0.2000, 0.0000], [ 0.0000, 0.0000, 0.0000], [-0.2000, -0.2000, 0.0000], [-0.2000, -0.2000, 0.0000], [ 0.0000, 0.0000, 0.0000], [ 0.2000, -0.2000, 0.0000], [ 0.2000, -0.2000, 0.0000]]) >>> t.parent_of('front_left_leg') 'torso' >>> t.index('front_right_foot') 6 >>> t[2] 'aux_1' """ __example_mjcf_path__ = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tests/ant.xml" ) def __init__(self, node_names, parent_indices, local_translation): """ :param node_names: a list of names for each tree node :type node_names: List[str] :param parent_indices: an int32-typed tensor that represents the edge to its parent.\ -1 represents the root node :type parent_indices: Tensor :param local_translation: a 3d vector that gives local translation information :type local_translation: Tensor """ ln, lp, ll = len(node_names), len(parent_indices), len(local_translation) assert len(set((ln, lp, ll))) == 1 self._node_names = node_names self._parent_indices = parent_indices.long() self._local_translation = local_translation self._node_indices = {self.node_names[i]: i for i in range(len(self))} def __len__(self): """ number of nodes in the skeleton tree """ return len(self.node_names) def __iter__(self): """ iterator that iterate through the name of each node """ yield from self.node_names def __getitem__(self, item): """ get the name of the node given the index """ return self.node_names[item] def __repr__(self): return ( "SkeletonTree(\n node_names={},\n parent_indices={}," "\n local_translation={}\n)".format( self._indent(repr(self.node_names)), self._indent(repr(self.parent_indices)), self._indent(repr(self.local_translation)), ) ) def _indent(self, s): return "\n ".join(s.split("\n")) @property def node_names(self): return self._node_names @property def parent_indices(self): return self._parent_indices @property def local_translation(self): return self._local_translation @property def num_joints(self): """ number of nodes in the skeleton tree """ return len(self) @classmethod def from_dict(cls, dict_repr, *args, **kwargs): return cls( list(map(str, dict_repr["node_names"])), TensorUtils.from_dict(dict_repr["parent_indices"], *args, **kwargs), TensorUtils.from_dict(dict_repr["local_translation"], *args, **kwargs), ) def to_dict(self): return OrderedDict( [ ("node_names", self.node_names), ("parent_indices", tensor_to_dict(self.parent_indices)), ("local_translation", tensor_to_dict(self.local_translation)), ] ) @classmethod def from_mjcf(cls, path: str) -> "SkeletonTree": """ Parses a mujoco xml scene description file and returns a Skeleton Tree. We use the model attribute at the root as the name of the tree. :param path: :type path: string :return: The skeleton tree constructed from the mjcf file :rtype: SkeletonTree """ tree = ET.parse(path) xml_doc_root = tree.getroot() xml_world_body = xml_doc_root.find("worldbody") if xml_world_body is None: raise ValueError("MJCF parsed incorrectly please verify it.") # assume this is the root xml_body_root = xml_world_body.find("body") if xml_body_root is None: raise ValueError("MJCF parsed incorrectly please verify it.") node_names = [] parent_indices = [] local_translation = [] # recursively adding all nodes into the skel_tree def _add_xml_node(xml_node, parent_index, node_index): node_name = xml_node.attrib.get("name") # parse the local translation into float list pos = np.fromstring(xml_node.attrib.get("pos"), dtype=float, sep=" ") node_names.append(node_name) parent_indices.append(parent_index) local_translation.append(pos) curr_index = node_index node_index += 1 for next_node in xml_node.findall("body"): node_index = _add_xml_node(next_node, curr_index, node_index) return node_index _add_xml_node(xml_body_root, -1, 0) return cls( node_names, torch.from_numpy(np.array(parent_indices, dtype=np.int32)), torch.from_numpy(np.array(local_translation, dtype=np.float32)), ) def parent_of(self, node_name): """ get the name of the parent of the given node :param node_name: the name of the node :type node_name: string :rtype: string """ return self[int(self.parent_indices[self.index(node_name)].item())] def index(self, node_name): """ get the index of the node :param node_name: the name of the node :type node_name: string :rtype: int """ return self._node_indices[node_name] def drop_nodes_by_names( self, node_names: List[str], pairwise_translation=None ) -> "SkeletonTree": new_length = len(self) - len(node_names) new_node_names = [] new_local_translation = torch.zeros( new_length, 3, dtype=self.local_translation.dtype ) new_parent_indices = torch.zeros(new_length, dtype=self.parent_indices.dtype) parent_indices = self.parent_indices.numpy() new_node_indices: dict = {} new_node_index = 0 for node_index in range(len(self)): if self[node_index] in node_names: continue tb_node_index = parent_indices[node_index] if tb_node_index != -1: local_translation = self.local_translation[node_index, :] while tb_node_index != -1 and self[tb_node_index] in node_names: local_translation += self.local_translation[tb_node_index, :] tb_node_index = parent_indices[tb_node_index] assert tb_node_index != -1, "the root node cannot be dropped" if pairwise_translation is not None: local_translation = pairwise_translation[ tb_node_index, node_index, : ] else: local_translation = self.local_translation[node_index, :] new_node_names.append(self[node_index]) new_local_translation[new_node_index, :] = local_translation if tb_node_index == -1: new_parent_indices[new_node_index] = -1 else: new_parent_indices[new_node_index] = new_node_indices[ self[tb_node_index] ] new_node_indices[self[node_index]] = new_node_index new_node_index += 1 return SkeletonTree(new_node_names, new_parent_indices, new_local_translation) def keep_nodes_by_names( self, node_names: List[str], pairwise_translation=None ) -> "SkeletonTree": nodes_to_drop = list(filter(lambda x: x not in node_names, self)) return self.drop_nodes_by_names(nodes_to_drop, pairwise_translation) class SkeletonState(Serializable): """ A skeleton state contains all the information needed to describe a static state of a skeleton. It requires a skeleton tree, local/global rotation at each joint and the root translation. Example: >>> t = SkeletonTree.from_mjcf(SkeletonTree.__example_mjcf_path__) >>> zero_pose = SkeletonState.zero_pose(t) >>> plot_skeleton_state(zero_pose) # can be imported from `.visualization.common` [plot of the ant at zero pose >>> local_rotation = zero_pose.local_rotation.clone() >>> local_rotation[2] = torch.tensor([0, 0, 1, 0]) >>> new_pose = SkeletonState.from_rotation_and_root_translation( ... skeleton_tree=t, ... r=local_rotation, ... t=zero_pose.root_translation, ... is_local=True ... ) >>> new_pose.local_rotation tensor([[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 1., 0., 0.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.]]) >>> plot_skeleton_state(new_pose) # you should be able to see one of ant's leg is bent [plot of the ant with the new pose >>> new_pose.global_rotation # the local rotation is propagated to the global rotation at joint #3 tensor([[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 1., 0., 0.], [0., 1., 0., 0.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.]]) Global/Local Representation (cont. from the previous example) >>> new_pose.is_local True >>> new_pose.tensor # this will return the local rotation followed by the root translation tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.]) >>> new_pose.tensor.shape # 4 * 13 (joint rotation) + 3 (root translatio torch.Size([55]) >>> new_pose.global_repr().is_local False >>> new_pose.global_repr().tensor # this will return the global rotation followed by the root translation instead tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.]) >>> new_pose.global_repr().tensor.shape # 4 * 13 (joint rotation) + 3 (root translation torch.Size([55]) """ def __init__(self, tensor_backend, skeleton_tree, is_local): self._skeleton_tree = skeleton_tree self._is_local = is_local self.tensor = tensor_backend.clone() def __len__(self): return self.tensor.shape[0] @property def rotation(self): if not hasattr(self, "_rotation"): self._rotation = self.tensor[..., : self.num_joints * 4].reshape( *(self.tensor.shape[:-1] + (self.num_joints, 4)) ) return self._rotation @property def _local_rotation(self): if self._is_local: return self.rotation else: return None @property def _global_rotation(self): if not self._is_local: return self.rotation else: return None @property def is_local(self): """ is the rotation represented in local frame? :rtype: bool """ return self._is_local @property def invariant_property(self): return {"skeleton_tree": self.skeleton_tree, "is_local": self.is_local} @property def num_joints(self): """ number of joints in the skeleton tree :rtype: int """ return self.skeleton_tree.num_joints @property def skeleton_tree(self): """ skeleton tree :rtype: SkeletonTree """ return self._skeleton_tree @property def root_translation(self): """ root translation :rtype: Tensor """ if not hasattr(self, "_root_translation"): self._root_translation = self.tensor[ ..., self.num_joints * 4 : self.num_joints * 4 + 3 ] return self._root_translation @property def global_transformation(self): """ global transformation of each joint (transform from joint frame to global frame) """ if not hasattr(self, "_global_transformation"): local_transformation = self.local_transformation global_transformation = [] parent_indices = self.skeleton_tree.parent_indices.numpy() # global_transformation = local_transformation.identity_like() for node_index in range(len(self.skeleton_tree)): parent_index = parent_indices[node_index] if parent_index == -1: global_transformation.append( local_transformation[..., node_index, :] ) else: global_transformation.append( transform_mul( global_transformation[parent_index], local_transformation[..., node_index, :], ) ) self._global_transformation = torch.stack(global_transformation, axis=-2) return self._global_transformation @property def global_rotation(self): """ global rotation of each joint (rotation matrix to rotate from joint's F.O.R to global F.O.R) """ if self._global_rotation is None: if not hasattr(self, "_comp_global_rotation"): self._comp_global_rotation = transform_rotation( self.global_transformation ) return self._comp_global_rotation else: return self._global_rotation @property def global_translation(self): """ global translation of each joint """ if not hasattr(self, "_global_translation"): self._global_translation = transform_translation(self.global_transformation) return self._global_translation @property def global_translation_xy(self): """ global translation in xy """ trans_xy_data = self.global_translation.zeros_like() trans_xy_data[..., 0:2] = self.global_translation[..., 0:2] return trans_xy_data @property def global_translation_xz(self): """ global translation in xz """ trans_xz_data = self.global_translation.zeros_like() trans_xz_data[..., 0:1] = self.global_translation[..., 0:1] trans_xz_data[..., 2:3] = self.global_translation[..., 2:3] return trans_xz_data @property def local_rotation(self): """ the rotation from child frame to parent frame given in the order of child nodes appeared in `.skeleton_tree.node_names` """ if self._local_rotation is None: if not hasattr(self, "_comp_local_rotation"): local_rotation = quat_identity_like(self.global_rotation) for node_index in range(len(self.skeleton_tree)): parent_index = self.skeleton_tree.parent_indices[node_index] if parent_index == -1: local_rotation[..., node_index, :] = self.global_rotation[ ..., node_index, : ] else: local_rotation[..., node_index, :] = quat_mul_norm( quat_inverse(self.global_rotation[..., parent_index, :]), self.global_rotation[..., node_index, :], ) self._comp_local_rotation = local_rotation return self._comp_local_rotation else: return self._local_rotation @property def local_transformation(self): """ local translation + local rotation. It describes the transformation from child frame to parent frame given in the order of child nodes appeared in `.skeleton_tree.node_names` """ if not hasattr(self, "_local_transformation"): self._local_transformation = transform_from_rotation_translation( r=self.local_rotation, t=self.local_translation ) return self._local_transformation @property def local_translation(self): """ local translation of the skeleton state. It is identical to the local translation in `.skeleton_tree.local_translation` except the root translation. The root translation is identical to `.root_translation` """ if not hasattr(self, "_local_translation"): broadcast_shape = ( tuple(self.tensor.shape[:-1]) + (len(self.skeleton_tree),) + tuple(self.skeleton_tree.local_translation.shape[-1:]) ) local_translation = self.skeleton_tree.local_translation.broadcast_to( *broadcast_shape ).clone() local_translation[..., 0, :] = self.root_translation self._local_translation = local_translation return self._local_translation # Root Properties @property def root_translation_xy(self): """ root translation on xy """ if not hasattr(self, "_root_translation_xy"): self._root_translation_xy = self.global_translation_xy[..., 0, :] return self._root_translation_xy @property def global_root_rotation(self): """ root rotation """ if not hasattr(self, "_global_root_rotation"): self._global_root_rotation = self.global_rotation[..., 0, :] return self._global_root_rotation @property def global_root_yaw_rotation(self): """ root yaw rotation """ if not hasattr(self, "_global_root_yaw_rotation"): self._global_root_yaw_rotation = self.global_root_rotation.yaw_rotation() return self._global_root_yaw_rotation # Properties relative to root @property def local_translation_to_root(self): """ The 3D translation from joint frame to the root frame. """ if not hasattr(self, "_local_translation_to_root"): self._local_translation_to_root = ( self.global_translation - self.root_translation.unsqueeze(-1) ) return self._local_translation_to_root @property def local_rotation_to_root(self): """ The 3D rotation from joint frame to the root frame. It is equivalent to The root_R_world * world_R_node """ return ( quat_inverse(self.global_root_rotation).unsqueeze(-1) * self.global_rotation ) def compute_forward_vector( self, left_shoulder_index, right_shoulder_index, left_hip_index, right_hip_index, gaussian_filter_width=20, ): """ Computes forward vector based on cross product of the up vector with average of the right->left shoulder and hip vectors """ global_positions = self.global_translation # Perpendicular to the forward direction. # Uses the shoulders and hips to find this. side_direction = ( global_positions[:, left_shoulder_index].numpy() - global_positions[:, right_shoulder_index].numpy() + global_positions[:, left_hip_index].numpy() - global_positions[:, right_hip_index].numpy() ) side_direction = ( side_direction / np.sqrt((side_direction ** 2).sum(axis=-1))[..., np.newaxis] ) # Forward direction obtained by crossing with the up direction. forward_direction = np.cross(side_direction, np.array([[0, 1, 0]])) # Smooth the forward direction with a Gaussian. # Axis 0 is the time/frame axis. forward_direction = filters.gaussian_filter1d( forward_direction, gaussian_filter_width, axis=0, mode="nearest" ) forward_direction = ( forward_direction / np.sqrt((forward_direction ** 2).sum(axis=-1))[..., np.newaxis] ) return torch.from_numpy(forward_direction) @staticmethod def _to_state_vector(rot, rt): state_shape = rot.shape[:-2] vr = rot.reshape(*(state_shape + (-1,))) vt = rt.broadcast_to(*state_shape + rt.shape[-1:]).reshape( *(state_shape + (-1,)) ) v = torch.cat([vr, vt], axis=-1) return v @classmethod def from_dict( cls: Type["SkeletonState"], dict_repr: OrderedDict, *args, **kwargs ) -> "SkeletonState": rot = TensorUtils.from_dict(dict_repr["rotation"], *args, **kwargs) rt = TensorUtils.from_dict(dict_repr["root_translation"], *args, **kwargs) return cls( SkeletonState._to_state_vector(rot, rt), SkeletonTree.from_dict(dict_repr["skeleton_tree"], *args, **kwargs), dict_repr["is_local"], ) def to_dict(self) -> OrderedDict: return OrderedDict( [ ("rotation", tensor_to_dict(self.rotation)), ("root_translation", tensor_to_dict(self.root_translation)), ("skeleton_tree", self.skeleton_tree.to_dict()), ("is_local", self.is_local), ] ) @classmethod def from_rotation_and_root_translation(cls, skeleton_tree, r, t, is_local=True): """ Construct a skeleton state from rotation and root translation :param skeleton_tree: the skeleton tree :type skeleton_tree: SkeletonTree :param r: rotation (either global or local) :type r: Tensor :param t: root translation :type t: Tensor :param is_local: to indicate that whether the rotation is local or global :type is_local: bool, optional, default=True """ assert ( r.dim() > 0 ), "the rotation needs to have at least 1 dimension (dim = {})".format(r.dim) return cls( SkeletonState._to_state_vector(r, t), skeleton_tree=skeleton_tree, is_local=is_local, ) @classmethod def zero_pose(cls, skeleton_tree): """ Construct a zero-pose skeleton state from the skeleton tree by assuming that all the local rotation is 0 and root translation is also 0. :param skeleton_tree: the skeleton tree as the rigid body :type skeleton_tree: SkeletonTree """ return cls.from_rotation_and_root_translation( skeleton_tree=skeleton_tree, r=quat_identity([skeleton_tree.num_joints]), t=torch.zeros(3, dtype=skeleton_tree.local_translation.dtype), is_local=True, ) def local_repr(self): """ Convert the skeleton state into local representation. This will only affects the values of .tensor. If the skeleton state already has `is_local=True`. This method will do nothing. :rtype: SkeletonState """ if self.is_local: return self return SkeletonState.from_rotation_and_root_translation( self.skeleton_tree, r=self.local_rotation, t=self.root_translation, is_local=True, ) def global_repr(self): """ Convert the skeleton state into global representation. This will only affects the values of .tensor. If the skeleton state already has `is_local=False`. This method will do nothing. :rtype: SkeletonState """ if not self.is_local: return self return SkeletonState.from_rotation_and_root_translation( self.skeleton_tree, r=self.global_rotation, t=self.root_translation, is_local=False, ) def _get_pairwise_average_translation(self): global_transform_inv = transform_inverse(self.global_transformation) p1 = global_transform_inv.unsqueeze(-2) p2 = self.global_transformation.unsqueeze(-3) pairwise_translation = ( transform_translation(transform_mul(p1, p2)) .reshape(-1, len(self.skeleton_tree), len(self.skeleton_tree), 3) .mean(axis=0) ) return pairwise_translation def _transfer_to(self, new_skeleton_tree: SkeletonTree): old_indices = list(map(self.skeleton_tree.index, new_skeleton_tree)) return SkeletonState.from_rotation_and_root_translation( new_skeleton_tree, r=self.global_rotation[..., old_indices, :], t=self.root_translation, is_local=False, ) def drop_nodes_by_names( self, node_names: List[str], estimate_local_translation_from_states: bool = True ) -> "SkeletonState": """ Drop a list of nodes from the skeleton and re-compute the local rotation to match the original joint position as much as possible. :param node_names: a list node names that specifies the nodes need to be dropped :type node_names: List of strings :param estimate_local_translation_from_states: the boolean indicator that specifies whether\ or not to re-estimate the local translation from the states (avg.) :type estimate_local_translation_from_states: boolean :rtype: SkeletonState """ if estimate_local_translation_from_states: pairwise_translation = self._get_pairwise_average_translation() else: pairwise_translation = None new_skeleton_tree = self.skeleton_tree.drop_nodes_by_names( node_names, pairwise_translation ) return self._transfer_to(new_skeleton_tree) def keep_nodes_by_names( self, node_names: List[str], estimate_local_translation_from_states: bool = True ) -> "SkeletonState": """ Keep a list of nodes and drop all other nodes from the skeleton and re-compute the local rotation to match the original joint position as much as possible. :param node_names: a list node names that specifies the nodes need to be dropped :type node_names: List of strings :param estimate_local_translation_from_states: the boolean indicator that specifies whether\ or not to re-estimate the local translation from the states (avg.) :type estimate_local_translation_from_states: boolean :rtype: SkeletonState """ return self.drop_nodes_by_names( list(filter(lambda x: (x not in node_names), self)), estimate_local_translation_from_states, ) def _remapped_to( self, joint_mapping: Dict[str, str], target_skeleton_tree: SkeletonTree ): joint_mapping_inv = {target: source for source, target in joint_mapping.items()} reduced_target_skeleton_tree = target_skeleton_tree.keep_nodes_by_names( list(joint_mapping_inv) ) n_joints = ( len(joint_mapping), len(self.skeleton_tree), len(reduced_target_skeleton_tree), ) assert ( len(set(n_joints)) == 1 ), "the joint mapping is not consistent with the skeleton trees" source_indices = list( map( lambda x: self.skeleton_tree.index(joint_mapping_inv[x]), reduced_target_skeleton_tree, ) ) target_local_rotation = self.local_rotation[..., source_indices, :] return SkeletonState.from_rotation_and_root_translation( skeleton_tree=reduced_target_skeleton_tree, r=target_local_rotation, t=self.root_translation, is_local=True, ) def retarget_to( self, joint_mapping: Dict[str, str], source_tpose_local_rotation, source_tpose_root_translation: np.ndarray, target_skeleton_tree: SkeletonTree, target_tpose_local_rotation, target_tpose_root_translation: np.ndarray, rotation_to_target_skeleton, scale_to_target_skeleton: float, z_up: bool = True, ) -> "SkeletonState": """ Retarget the skeleton state to a target skeleton tree. This is a naive retarget implementation with rough approximations. The function follows the procedures below. Steps: 1. Drop the joints from the source (self) that do not belong to the joint mapping\ with an implementation that is similar to "keep_nodes_by_names()" - take a\ look at the function doc for more details (same for source_tpose) 2. Rotate the source state and the source tpose by "rotation_to_target_skeleton"\ to align the source with the target orientation 3. Extract the root translation and normalize it to match the scale of the target\ skeleton 4. Extract the global rotation from source state relative to source tpose and\ re-apply the relative rotation to the target tpose to construct the global\ rotation after retargetting 5. Combine the computed global rotation and the root translation from 3 and 4 to\ complete the retargeting. 6. Make feet on the ground (global translation z) :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \ the target skeleton :type joint_mapping: Dict[str, str] :param source_tpose_local_rotation: the local rotation of the source skeleton :type source_tpose_local_rotation: Tensor :param source_tpose_root_translation: the root translation of the source tpose :type source_tpose_root_translation: np.ndarray :param target_skeleton_tree: the target skeleton tree :type target_skeleton_tree: SkeletonTree :param target_tpose_local_rotation: the local rotation of the target skeleton :type target_tpose_local_rotation: Tensor :param target_tpose_root_translation: the root translation of the target tpose :type target_tpose_root_translation: Tensor :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\ skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\ the frame of reference of the target skeleton and s is the frame of reference of the source\ skeleton :type rotation_to_target_skeleton: Tensor :param scale_to_target_skeleton: the factor that needs to be multiplied from source\ skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \ factor needs to be 0.01. :type scale_to_target_skeleton: float :rtype: SkeletonState """ # STEP 0: Preprocess source_tpose = SkeletonState.from_rotation_and_root_translation( skeleton_tree=self.skeleton_tree, r=source_tpose_local_rotation, t=source_tpose_root_translation, is_local=True, ) target_tpose = SkeletonState.from_rotation_and_root_translation( skeleton_tree=target_skeleton_tree, r=target_tpose_local_rotation, t=target_tpose_root_translation, is_local=True, ) # STEP 1: Drop the irrelevant joints pairwise_translation = self._get_pairwise_average_translation() node_names = list(joint_mapping) new_skeleton_tree = self.skeleton_tree.keep_nodes_by_names( node_names, pairwise_translation ) # TODO: combine the following steps before STEP 3 source_tpose = source_tpose._transfer_to(new_skeleton_tree) source_state = self._transfer_to(new_skeleton_tree) source_tpose = source_tpose._remapped_to(joint_mapping, target_skeleton_tree) source_state = source_state._remapped_to(joint_mapping, target_skeleton_tree) # STEP 2: Rotate the source to align with the target new_local_rotation = source_tpose.local_rotation.clone() new_local_rotation[..., 0, :] = quat_mul_norm( rotation_to_target_skeleton, source_tpose.local_rotation[..., 0, :] ) source_tpose = SkeletonState.from_rotation_and_root_translation( skeleton_tree=source_tpose.skeleton_tree, r=new_local_rotation, t=quat_rotate(rotation_to_target_skeleton, source_tpose.root_translation), is_local=True, ) new_local_rotation = source_state.local_rotation.clone() new_local_rotation[..., 0, :] = quat_mul_norm( rotation_to_target_skeleton, source_state.local_rotation[..., 0, :] ) source_state = SkeletonState.from_rotation_and_root_translation( skeleton_tree=source_state.skeleton_tree, r=new_local_rotation, t=quat_rotate(rotation_to_target_skeleton, source_state.root_translation), is_local=True, ) # STEP 3: Normalize to match the target scale root_translation_diff = ( source_state.root_translation - source_tpose.root_translation ) * scale_to_target_skeleton # STEP 4: the global rotation from source state relative to source tpose and # re-apply to the target current_skeleton_tree = source_state.skeleton_tree target_tpose_global_rotation = source_state.global_rotation[0, :].clone() for current_index, name in enumerate(current_skeleton_tree): if name in target_tpose.skeleton_tree: target_tpose_global_rotation[ current_index, : ] = target_tpose.global_rotation[ target_tpose.skeleton_tree.index(name), : ] global_rotation_diff = quat_mul_norm( source_state.global_rotation, quat_inverse(source_tpose.global_rotation) ) new_global_rotation = quat_mul_norm( global_rotation_diff, target_tpose_global_rotation ) # STEP 5: Putting 3 and 4 together current_skeleton_tree = source_state.skeleton_tree shape = source_state.global_rotation.shape[:-1] shape = shape[:-1] + target_tpose.global_rotation.shape[-2:-1] new_global_rotation_output = quat_identity(shape) for current_index, name in enumerate(target_skeleton_tree): while name not in current_skeleton_tree: name = target_skeleton_tree.parent_of(name) parent_index = current_skeleton_tree.index(name) new_global_rotation_output[:, current_index, :] = new_global_rotation[ :, parent_index, : ] source_state = SkeletonState.from_rotation_and_root_translation( skeleton_tree=target_skeleton_tree, r=new_global_rotation_output, t=target_tpose.root_translation + root_translation_diff, is_local=False, ).local_repr() return source_state def retarget_to_by_tpose( self, joint_mapping: Dict[str, str], source_tpose: "SkeletonState", target_tpose: "SkeletonState", rotation_to_target_skeleton, scale_to_target_skeleton: float, ) -> "SkeletonState": """ Retarget the skeleton state to a target skeleton tree. This is a naive retarget implementation with rough approximations. See the method `retarget_to()` for more information :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \ the target skeleton :type joint_mapping: Dict[str, str] :param source_tpose: t-pose of the source skeleton :type source_tpose: SkeletonState :param target_tpose: t-pose of the target skeleton :type target_tpose: SkeletonState :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\ skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\ the frame of reference of the target skeleton and s is the frame of reference of the source\ skeleton :type rotation_to_target_skeleton: Tensor :param scale_to_target_skeleton: the factor that needs to be multiplied from source\ skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \ factor needs to be 0.01. :type scale_to_target_skeleton: float :rtype: SkeletonState """ assert ( len(source_tpose.shape) == 0 and len(target_tpose.shape) == 0 ), "the retargeting script currently doesn't support vectorized operations" return self.retarget_to( joint_mapping, source_tpose.local_rotation, source_tpose.root_translation, target_tpose.skeleton_tree, target_tpose.local_rotation, target_tpose.root_translation, rotation_to_target_skeleton, scale_to_target_skeleton, ) class SkeletonMotion(SkeletonState): def __init__(self, tensor_backend, skeleton_tree, is_local, fps, *args, **kwargs): self._fps = fps super().__init__(tensor_backend, skeleton_tree, is_local, *args, **kwargs) def clone(self): return SkeletonMotion( self.tensor.clone(), self.skeleton_tree, self._is_local, self._fps ) @property def invariant_property(self): return { "skeleton_tree": self.skeleton_tree, "is_local": self.is_local, "fps": self.fps, } @property def global_velocity(self): """ global velocity """ curr_index = self.num_joints * 4 + 3 return self.tensor[..., curr_index : curr_index + self.num_joints * 3].reshape( *(self.tensor.shape[:-1] + (self.num_joints, 3)) ) @property def global_angular_velocity(self): """ global angular velocity """ curr_index = self.num_joints * 7 + 3 return self.tensor[..., curr_index : curr_index + self.num_joints * 3].reshape( *(self.tensor.shape[:-1] + (self.num_joints, 3)) ) @property def fps(self): """ number of frames per second """ return self._fps @property def time_delta(self): """ time between two adjacent frames """ return 1.0 / self.fps @property def global_root_velocity(self): """ global root velocity """ return self.global_velocity[..., 0, :] @property def global_root_angular_velocity(self): """ global root angular velocity """ return self.global_angular_velocity[..., 0, :] @classmethod def from_state_vector_and_velocity( cls, skeleton_tree, state_vector, global_velocity, global_angular_velocity, is_local, fps, ): """ Construct a skeleton motion from a skeleton state vector, global velocity and angular velocity at each joint. :param skeleton_tree: the skeleton tree that the motion is based on :type skeleton_tree: SkeletonTree :param state_vector: the state vector from the skeleton state by `.tensor` :type state_vector: Tensor :param global_velocity: the global velocity at each joint :type global_velocity: Tensor :param global_angular_velocity: the global angular velocity at each joint :type global_angular_velocity: Tensor :param is_local: if the rotation ins the state vector is given in local frame :type is_local: boolean :param fps: number of frames per second :type fps: int :rtype: SkeletonMotion """ state_shape = state_vector.shape[:-1] v = global_velocity.reshape(*(state_shape + (-1,))) av = global_angular_velocity.reshape(*(state_shape + (-1,))) new_state_vector = torch.cat([state_vector, v, av], axis=-1) return cls( new_state_vector, skeleton_tree=skeleton_tree, is_local=is_local, fps=fps, ) @classmethod def from_skeleton_state( cls: Type["SkeletonMotion"], skeleton_state: SkeletonState, fps: int ): """ Construct a skeleton motion from a skeleton state. The velocities are estimated using second order gaussian filter along the last axis. The skeleton state must have at least .dim >= 1 :param skeleton_state: the skeleton state that the motion is based on :type skeleton_state: SkeletonState :param fps: number of frames per second :type fps: int :rtype: SkeletonMotion """ assert ( type(skeleton_state) == SkeletonState ), "expected type of {}, got {}".format(SkeletonState, type(skeleton_state)) global_velocity = SkeletonMotion._compute_velocity( p=skeleton_state.global_translation, time_delta=1 / fps ) global_angular_velocity = SkeletonMotion._compute_angular_velocity( r=skeleton_state.global_rotation, time_delta=1 / fps ) return cls.from_state_vector_and_velocity( skeleton_tree=skeleton_state.skeleton_tree, state_vector=skeleton_state.tensor, global_velocity=global_velocity, global_angular_velocity=global_angular_velocity, is_local=skeleton_state.is_local, fps=fps, ) @staticmethod def _to_state_vector(rot, rt, vel, avel): state_shape = rot.shape[:-2] skeleton_state_v = SkeletonState._to_state_vector(rot, rt) v = vel.reshape(*(state_shape + (-1,))) av = avel.reshape(*(state_shape + (-1,))) skeleton_motion_v = torch.cat([skeleton_state_v, v, av], axis=-1) return skeleton_motion_v @classmethod def from_dict( cls: Type["SkeletonMotion"], dict_repr: OrderedDict, *args, **kwargs ) -> "SkeletonMotion": rot = TensorUtils.from_dict(dict_repr["rotation"], *args, **kwargs) rt = TensorUtils.from_dict(dict_repr["root_translation"], *args, **kwargs) vel = TensorUtils.from_dict(dict_repr["global_velocity"], *args, **kwargs) avel = TensorUtils.from_dict( dict_repr["global_angular_velocity"], *args, **kwargs ) return cls( SkeletonMotion._to_state_vector(rot, rt, vel, avel), skeleton_tree=SkeletonTree.from_dict( dict_repr["skeleton_tree"], *args, **kwargs ), is_local=dict_repr["is_local"], fps=dict_repr["fps"], ) def to_dict(self) -> OrderedDict: return OrderedDict( [ ("rotation", tensor_to_dict(self.rotation)), ("root_translation", tensor_to_dict(self.root_translation)), ("global_velocity", tensor_to_dict(self.global_velocity)), ("global_angular_velocity", tensor_to_dict(self.global_angular_velocity)), ("skeleton_tree", self.skeleton_tree.to_dict()), ("is_local", self.is_local), ("fps", self.fps), ] ) @classmethod def from_fbx( cls: Type["SkeletonMotion"], fbx_file_path, skeleton_tree=None, is_local=True, fps=120, root_joint="", root_trans_index=0, *args, **kwargs, ) -> "SkeletonMotion": """ Construct a skeleton motion from a fbx file (TODO - generalize this). If the skeleton tree is not given, it will use the first frame of the mocap to construct the skeleton tree. :param fbx_file_path: the path of the fbx file :type fbx_file_path: string :param fbx_configs: the configuration in terms of {"tmp_path": ..., "fbx_py27_path": ...} :type fbx_configs: dict :param skeleton_tree: the optional skeleton tree that the rotation will be applied to :type skeleton_tree: SkeletonTree, optional :param is_local: the state vector uses local or global rotation as the representation :type is_local: bool, optional, default=True :param fps: FPS of the FBX animation :type fps: int, optional, default=120 :param root_joint: the name of the root joint for the skeleton :type root_joint: string, optional, default="" or the first node in the FBX scene with animation data :param root_trans_index: index of joint to extract root transform from :type root_trans_index: int, optional, default=0 or the root joint in the parsed skeleton :rtype: SkeletonMotion """ joint_names, joint_parents, transforms, fps = fbx_to_array( fbx_file_path, root_joint, fps ) # swap the last two axis to match the convention local_transform = euclidean_to_transform( transformation_matrix=torch.from_numpy( np.swapaxes(np.array(transforms), -1, -2), ).float() ) local_rotation = transform_rotation(local_transform) root_translation = transform_translation(local_transform)[..., root_trans_index, :] joint_parents = torch.from_numpy(np.array(joint_parents)).int() if skeleton_tree is None: local_translation = transform_translation(local_transform).reshape( -1, len(joint_parents), 3 )[0] skeleton_tree = SkeletonTree(joint_names, joint_parents, local_translation) skeleton_state = SkeletonState.from_rotation_and_root_translation( skeleton_tree, r=local_rotation, t=root_translation, is_local=True ) if not is_local: skeleton_state = skeleton_state.global_repr() return cls.from_skeleton_state( skeleton_state=skeleton_state, fps=fps ) @staticmethod def _compute_velocity(p, time_delta, guassian_filter=True): velocity = torch.from_numpy( filters.gaussian_filter1d( np.gradient(p.numpy(), axis=-3), 2, axis=-3, mode="nearest" ) / time_delta, ) return velocity @staticmethod def _compute_angular_velocity(r, time_delta: float, guassian_filter=True): # assume the second last dimension is the time axis diff_quat_data = quat_identity_like(r) diff_quat_data[..., :-1, :, :] = quat_mul_norm( r[..., 1:, :, :], quat_inverse(r[..., :-1, :, :]) ) diff_angle, diff_axis = quat_angle_axis(diff_quat_data) angular_velocity = diff_axis * diff_angle.unsqueeze(-1) / time_delta angular_velocity = torch.from_numpy( filters.gaussian_filter1d( angular_velocity.numpy(), 2, axis=-3, mode="nearest" ), ) return angular_velocity def crop(self, start: int, end: int, fps: Optional[int] = None): """ Crop the motion along its last axis. This is equivalent to performing a slicing on the object with [..., start: end: skip_every] where skip_every = old_fps / fps. Note that the new fps provided must be a factor of the original fps. :param start: the beginning frame index :type start: int :param end: the ending frame index :type end: int :param fps: number of frames per second in the output (if not given the original fps will be used) :type fps: int, optional :rtype: SkeletonMotion """ if fps is None: new_fps = int(self.fps) old_fps = int(self.fps) else: new_fps = int(fps) old_fps = int(self.fps) assert old_fps % fps == 0, ( "the resampling doesn't support fps with non-integer division " "from the original fps: {} => {}".format(old_fps, fps) ) skip_every = old_fps // new_fps return SkeletonMotion.from_skeleton_state( SkeletonState.from_rotation_and_root_translation( skeleton_tree=self.skeleton_tree, t=self.root_translation[start:end:skip_every], r=self.local_rotation[start:end:skip_every], is_local=True ), fps=self.fps ) def retarget_to( self, joint_mapping: Dict[str, str], source_tpose_local_rotation, source_tpose_root_translation: np.ndarray, target_skeleton_tree: "SkeletonTree", target_tpose_local_rotation, target_tpose_root_translation: np.ndarray, rotation_to_target_skeleton, scale_to_target_skeleton: float, z_up: bool = True, ) -> "SkeletonMotion": """ Same as the one in :class:`SkeletonState`. This method discards all velocity information before retargeting and re-estimate the velocity after the retargeting. The same fps is used in the new retargetted motion. :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \ the target skeleton :type joint_mapping: Dict[str, str] :param source_tpose_local_rotation: the local rotation of the source skeleton :type source_tpose_local_rotation: Tensor :param source_tpose_root_translation: the root translation of the source tpose :type source_tpose_root_translation: np.ndarray :param target_skeleton_tree: the target skeleton tree :type target_skeleton_tree: SkeletonTree :param target_tpose_local_rotation: the local rotation of the target skeleton :type target_tpose_local_rotation: Tensor :param target_tpose_root_translation: the root translation of the target tpose :type target_tpose_root_translation: Tensor :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\ skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\ the frame of reference of the target skeleton and s is the frame of reference of the source\ skeleton :type rotation_to_target_skeleton: Tensor :param scale_to_target_skeleton: the factor that needs to be multiplied from source\ skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \ factor needs to be 0.01. :type scale_to_target_skeleton: float :rtype: SkeletonMotion """ return SkeletonMotion.from_skeleton_state( super().retarget_to( joint_mapping, source_tpose_local_rotation, source_tpose_root_translation, target_skeleton_tree, target_tpose_local_rotation, target_tpose_root_translation, rotation_to_target_skeleton, scale_to_target_skeleton, z_up, ), self.fps, ) def retarget_to_by_tpose( self, joint_mapping: Dict[str, str], source_tpose: "SkeletonState", target_tpose: "SkeletonState", rotation_to_target_skeleton, scale_to_target_skeleton: float, z_up: bool = True, ) -> "SkeletonMotion": """ Same as the one in :class:`SkeletonState`. This method discards all velocity information before retargeting and re-estimate the velocity after the retargeting. The same fps is used in the new retargetted motion. :param joint_mapping: a dictionary of that maps the joint node from the source skeleton to \ the target skeleton :type joint_mapping: Dict[str, str] :param source_tpose: t-pose of the source skeleton :type source_tpose: SkeletonState :param target_tpose: t-pose of the target skeleton :type target_tpose: SkeletonState :param rotation_to_target_skeleton: the rotation that needs to be applied to the source\ skeleton to align with the target skeleton. Essentially the rotation is t_R_s, where t is\ the frame of reference of the target skeleton and s is the frame of reference of the source\ skeleton :type rotation_to_target_skeleton: Tensor :param scale_to_target_skeleton: the factor that needs to be multiplied from source\ skeleton to target skeleton (unit in distance). For example, to go from `cm` to `m`, the \ factor needs to be 0.01. :type scale_to_target_skeleton: float :rtype: SkeletonMotion """ return self.retarget_to( joint_mapping, source_tpose.local_rotation, source_tpose.root_translation, target_tpose.skeleton_tree, target_tpose.local_rotation, target_tpose.root_translation, rotation_to_target_skeleton, scale_to_target_skeleton, z_up, ) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/__init__.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/common.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os from ..core import logger from .plt_plotter import Matplotlib3DPlotter from .skeleton_plotter_tasks import Draw3DSkeletonMotion, Draw3DSkeletonState def plot_skeleton_state(skeleton_state, task_name=""): """ Visualize a skeleton state :param skeleton_state: :param task_name: :type skeleton_state: SkeletonState :type task_name: string, optional """ logger.info("plotting {}".format(task_name)) task = Draw3DSkeletonState(task_name=task_name, skeleton_state=skeleton_state) plotter = Matplotlib3DPlotter(task) plotter.show() def plot_skeleton_states(skeleton_state, skip_n=1, task_name=""): """ Visualize a sequence of skeleton state. The dimension of the skeleton state must be 1 :param skeleton_state: :param task_name: :type skeleton_state: SkeletonState :type task_name: string, optional """ logger.info("plotting {} motion".format(task_name)) assert len(skeleton_state.shape) == 1, "the state must have only one dimension" task = Draw3DSkeletonState(task_name=task_name, skeleton_state=skeleton_state[0]) plotter = Matplotlib3DPlotter(task) for frame_id in range(skeleton_state.shape[0]): if frame_id % skip_n != 0: continue task.update(skeleton_state[frame_id]) plotter.update() plotter.show() def plot_skeleton_motion(skeleton_motion, skip_n=1, task_name=""): """ Visualize a skeleton motion along its first dimension. :param skeleton_motion: :param task_name: :type skeleton_motion: SkeletonMotion :type task_name: string, optional """ logger.info("plotting {} motion".format(task_name)) task = Draw3DSkeletonMotion( task_name=task_name, skeleton_motion=skeleton_motion, frame_index=0 ) plotter = Matplotlib3DPlotter(task) for frame_id in range(len(skeleton_motion)): if frame_id % skip_n != 0: continue task.update(frame_id) plotter.update() plotter.show() def plot_skeleton_motion_interactive_base(skeleton_motion, task_name=""): class PlotParams: def __init__(self, total_num_frames): self.current_frame = 0 self.playing = False self.looping = False self.confirmed = False self.playback_speed = 4 self.total_num_frames = total_num_frames def sync(self, other): self.current_frame = other.current_frame self.playing = other.playing self.looping = other.current_frame self.confirmed = other.confirmed self.playback_speed = other.playback_speed self.total_num_frames = other.total_num_frames task = Draw3DSkeletonMotion( task_name=task_name, skeleton_motion=skeleton_motion, frame_index=0 ) plotter = Matplotlib3DPlotter(task) plot_params = PlotParams(total_num_frames=len(skeleton_motion)) print("Entered interactive plot - press 'n' to quit, 'h' for a list of commands") def press(event): if event.key == "x": plot_params.playing = not plot_params.playing elif event.key == "z": plot_params.current_frame = plot_params.current_frame - 1 elif event.key == "c": plot_params.current_frame = plot_params.current_frame + 1 elif event.key == "a": plot_params.current_frame = plot_params.current_frame - 20 elif event.key == "d": plot_params.current_frame = plot_params.current_frame + 20 elif event.key == "w": plot_params.looping = not plot_params.looping print("Looping: {}".format(plot_params.looping)) elif event.key == "v": plot_params.playback_speed *= 2 print("playback speed: {}".format(plot_params.playback_speed)) elif event.key == "b": if plot_params.playback_speed != 1: plot_params.playback_speed //= 2 print("playback speed: {}".format(plot_params.playback_speed)) elif event.key == "n": plot_params.confirmed = True elif event.key == "h": rows, columns = os.popen("stty size", "r").read().split() columns = int(columns) print("=" * columns) print("x: play/pause") print("z: previous frame") print("c: next frame") print("a: jump 10 frames back") print("d: jump 10 frames forward") print("w: looping/non-looping") print("v: double speed (this can be applied multiple times)") print("b: half speed (this can be applied multiple times)") print("n: quit") print("h: help") print("=" * columns) print( 'current frame index: {}/{} (press "n" to quit)'.format( plot_params.current_frame, plot_params.total_num_frames - 1 ) ) plotter.fig.canvas.mpl_connect("key_press_event", press) while True: reset_trail = False if plot_params.confirmed: break if plot_params.playing: plot_params.current_frame += plot_params.playback_speed if plot_params.current_frame >= plot_params.total_num_frames: if plot_params.looping: plot_params.current_frame %= plot_params.total_num_frames reset_trail = True else: plot_params.current_frame = plot_params.total_num_frames - 1 if plot_params.current_frame < 0: if plot_params.looping: plot_params.current_frame %= plot_params.total_num_frames reset_trail = True else: plot_params.current_frame = 0 yield plot_params task.update(plot_params.current_frame, reset_trail) plotter.update() def plot_skeleton_motion_interactive(skeleton_motion, task_name=""): """ Visualize a skeleton motion along its first dimension interactively. :param skeleton_motion: :param task_name: :type skeleton_motion: SkeletonMotion :type task_name: string, optional """ for _ in plot_skeleton_motion_interactive_base(skeleton_motion, task_name): pass def plot_skeleton_motion_interactive_multiple(*callables, sync=True): for _ in zip(*callables): if sync: for p1, p2 in zip(_[:-1], _[1:]): p2.sync(p1) # def plot_skeleton_motion_interactive_multiple_same(skeleton_motions, task_name=""): ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/core.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ The base abstract classes for plotter and the plotting tasks. It describes how the plotter deals with the tasks in the general cases """ from typing import List class BasePlotterTask(object): _task_name: str # unique name of the task _task_type: str # type of the task is used to identify which callable def __init__(self, task_name: str, task_type: str) -> None: self._task_name = task_name self._task_type = task_type @property def task_name(self): return self._task_name @property def task_type(self): return self._task_type def get_scoped_name(self, name): return self._task_name + "/" + name def __iter__(self): """Should override this function to return a list of task primitives """ raise NotImplementedError class BasePlotterTasks(object): def __init__(self, tasks) -> None: self._tasks = tasks def __iter__(self): for task in self._tasks: yield from task class BasePlotter(object): """An abstract plotter which deals with a plotting task. The children class needs to implement the functions to create/update the objects according to the task given """ _task_primitives: List[BasePlotterTask] def __init__(self, task: BasePlotterTask) -> None: self._task_primitives = [] self.create(task) @property def task_primitives(self): return self._task_primitives def create(self, task: BasePlotterTask) -> None: """Create more task primitives from a task for the plotter""" new_task_primitives = list(task) # get all task primitives self._task_primitives += new_task_primitives # append them self._create_impl(new_task_primitives) def update(self) -> None: """Update the plotter for any updates in the task primitives""" self._update_impl(self._task_primitives) def _update_impl(self, task_list: List[BasePlotterTask]) -> None: raise NotImplementedError def _create_impl(self, task_list: List[BasePlotterTask]) -> None: raise NotImplementedError ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/plt_plotter.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ The matplotlib plotter implementation for all the primitive tasks (in our case: lines and dots) """ from typing import Any, Callable, Dict, List import matplotlib.pyplot as plt import mpl_toolkits.mplot3d.axes3d as p3 import numpy as np from .core import BasePlotter, BasePlotterTask class Matplotlib2DPlotter(BasePlotter): _fig: plt.figure # plt figure _ax: plt.axis # plt axis # stores artist objects for each task (task name as the key) _artist_cache: Dict[str, Any] # callables for each task primitives _create_impl_callables: Dict[str, Callable] _update_impl_callables: Dict[str, Callable] def __init__(self, task: "BasePlotterTask") -> None: fig, ax = plt.subplots() self._fig = fig self._ax = ax self._artist_cache = {} self._create_impl_callables = { "Draw2DLines": self._lines_create_impl, "Draw2DDots": self._dots_create_impl, "Draw2DTrail": self._trail_create_impl, } self._update_impl_callables = { "Draw2DLines": self._lines_update_impl, "Draw2DDots": self._dots_update_impl, "Draw2DTrail": self._trail_update_impl, } self._init_lim() super().__init__(task) @property def ax(self): return self._ax @property def fig(self): return self._fig def show(self): plt.show() def _min(self, x, y): if x is None: return y if y is None: return x return min(x, y) def _max(self, x, y): if x is None: return y if y is None: return x return max(x, y) def _init_lim(self): self._curr_x_min = None self._curr_y_min = None self._curr_x_max = None self._curr_y_max = None def _update_lim(self, xs, ys): self._curr_x_min = self._min(np.min(xs), self._curr_x_min) self._curr_y_min = self._min(np.min(ys), self._curr_y_min) self._curr_x_max = self._max(np.max(xs), self._curr_x_max) self._curr_y_max = self._max(np.max(ys), self._curr_y_max) def _set_lim(self): if not ( self._curr_x_min is None or self._curr_x_max is None or self._curr_y_min is None or self._curr_y_max is None ): self._ax.set_xlim(self._curr_x_min, self._curr_x_max) self._ax.set_ylim(self._curr_y_min, self._curr_y_max) self._init_lim() @staticmethod def _lines_extract_xy_impl(index, lines_task): return lines_task[index, :, 0], lines_task[index, :, 1] @staticmethod def _trail_extract_xy_impl(index, trail_task): return (trail_task[index : index + 2, 0], trail_task[index : index + 2, 1]) def _lines_create_impl(self, lines_task): color = lines_task.color self._artist_cache[lines_task.task_name] = [ self._ax.plot( *Matplotlib2DPlotter._lines_extract_xy_impl(i, lines_task), color=color, linewidth=lines_task.line_width, alpha=lines_task.alpha )[0] for i in range(len(lines_task)) ] def _lines_update_impl(self, lines_task): lines_artists = self._artist_cache[lines_task.task_name] for i in range(len(lines_task)): artist = lines_artists[i] xs, ys = Matplotlib2DPlotter._lines_extract_xy_impl(i, lines_task) artist.set_data(xs, ys) if lines_task.influence_lim: self._update_lim(xs, ys) def _dots_create_impl(self, dots_task): color = dots_task.color self._artist_cache[dots_task.task_name] = self._ax.plot( dots_task[:, 0], dots_task[:, 1], c=color, linestyle="", marker=".", markersize=dots_task.marker_size, alpha=dots_task.alpha, )[0] def _dots_update_impl(self, dots_task): dots_artist = self._artist_cache[dots_task.task_name] dots_artist.set_data(dots_task[:, 0], dots_task[:, 1]) if dots_task.influence_lim: self._update_lim(dots_task[:, 0], dots_task[:, 1]) def _trail_create_impl(self, trail_task): color = trail_task.color trail_length = len(trail_task) - 1 self._artist_cache[trail_task.task_name] = [ self._ax.plot( *Matplotlib2DPlotter._trail_extract_xy_impl(i, trail_task), color=trail_task.color, linewidth=trail_task.line_width, alpha=trail_task.alpha * (1.0 - i / (trail_length - 1)) )[0] for i in range(trail_length) ] def _trail_update_impl(self, trail_task): trails_artists = self._artist_cache[trail_task.task_name] for i in range(len(trail_task) - 1): artist = trails_artists[i] xs, ys = Matplotlib2DPlotter._trail_extract_xy_impl(i, trail_task) artist.set_data(xs, ys) if trail_task.influence_lim: self._update_lim(xs, ys) def _create_impl(self, task_list): for task in task_list: self._create_impl_callables[task.task_type](task) self._draw() def _update_impl(self, task_list): for task in task_list: self._update_impl_callables[task.task_type](task) self._draw() def _set_aspect_equal_2d(self, zero_centered=True): xlim = self._ax.get_xlim() ylim = self._ax.get_ylim() if not zero_centered: xmean = np.mean(xlim) ymean = np.mean(ylim) else: xmean = 0 ymean = 0 plot_radius = max( [ abs(lim - mean_) for lims, mean_ in ((xlim, xmean), (ylim, ymean)) for lim in lims ] ) self._ax.set_xlim([xmean - plot_radius, xmean + plot_radius]) self._ax.set_ylim([ymean - plot_radius, ymean + plot_radius]) def _draw(self): self._set_lim() self._set_aspect_equal_2d() self._fig.canvas.draw() self._fig.canvas.flush_events() plt.pause(0.00001) class Matplotlib3DPlotter(BasePlotter): _fig: plt.figure # plt figure _ax: p3.Axes3D # plt 3d axis # stores artist objects for each task (task name as the key) _artist_cache: Dict[str, Any] # callables for each task primitives _create_impl_callables: Dict[str, Callable] _update_impl_callables: Dict[str, Callable] def __init__(self, task: "BasePlotterTask") -> None: self._fig = plt.figure() self._ax = p3.Axes3D(self._fig) self._artist_cache = {} self._create_impl_callables = { "Draw3DLines": self._lines_create_impl, "Draw3DDots": self._dots_create_impl, "Draw3DTrail": self._trail_create_impl, } self._update_impl_callables = { "Draw3DLines": self._lines_update_impl, "Draw3DDots": self._dots_update_impl, "Draw3DTrail": self._trail_update_impl, } self._init_lim() super().__init__(task) @property def ax(self): return self._ax @property def fig(self): return self._fig def show(self): plt.show() def _min(self, x, y): if x is None: return y if y is None: return x return min(x, y) def _max(self, x, y): if x is None: return y if y is None: return x return max(x, y) def _init_lim(self): self._curr_x_min = None self._curr_y_min = None self._curr_z_min = None self._curr_x_max = None self._curr_y_max = None self._curr_z_max = None def _update_lim(self, xs, ys, zs): self._curr_x_min = self._min(np.min(xs), self._curr_x_min) self._curr_y_min = self._min(np.min(ys), self._curr_y_min) self._curr_z_min = self._min(np.min(zs), self._curr_z_min) self._curr_x_max = self._max(np.max(xs), self._curr_x_max) self._curr_y_max = self._max(np.max(ys), self._curr_y_max) self._curr_z_max = self._max(np.max(zs), self._curr_z_max) def _set_lim(self): if not ( self._curr_x_min is None or self._curr_x_max is None or self._curr_y_min is None or self._curr_y_max is None or self._curr_z_min is None or self._curr_z_max is None ): self._ax.set_xlim3d(self._curr_x_min, self._curr_x_max) self._ax.set_ylim3d(self._curr_y_min, self._curr_y_max) self._ax.set_zlim3d(self._curr_z_min, self._curr_z_max) self._init_lim() @staticmethod def _lines_extract_xyz_impl(index, lines_task): return lines_task[index, :, 0], lines_task[index, :, 1], lines_task[index, :, 2] @staticmethod def _trail_extract_xyz_impl(index, trail_task): return ( trail_task[index : index + 2, 0], trail_task[index : index + 2, 1], trail_task[index : index + 2, 2], ) def _lines_create_impl(self, lines_task): color = lines_task.color self._artist_cache[lines_task.task_name] = [ self._ax.plot( *Matplotlib3DPlotter._lines_extract_xyz_impl(i, lines_task), color=color, linewidth=lines_task.line_width, alpha=lines_task.alpha )[0] for i in range(len(lines_task)) ] def _lines_update_impl(self, lines_task): lines_artists = self._artist_cache[lines_task.task_name] for i in range(len(lines_task)): artist = lines_artists[i] xs, ys, zs = Matplotlib3DPlotter._lines_extract_xyz_impl(i, lines_task) artist.set_data(xs, ys) artist.set_3d_properties(zs) if lines_task.influence_lim: self._update_lim(xs, ys, zs) def _dots_create_impl(self, dots_task): color = dots_task.color self._artist_cache[dots_task.task_name] = self._ax.plot( dots_task[:, 0], dots_task[:, 1], dots_task[:, 2], c=color, linestyle="", marker=".", markersize=dots_task.marker_size, alpha=dots_task.alpha, )[0] def _dots_update_impl(self, dots_task): dots_artist = self._artist_cache[dots_task.task_name] dots_artist.set_data(dots_task[:, 0], dots_task[:, 1]) dots_artist.set_3d_properties(dots_task[:, 2]) if dots_task.influence_lim: self._update_lim(dots_task[:, 0], dots_task[:, 1], dots_task[:, 2]) def _trail_create_impl(self, trail_task): color = trail_task.color trail_length = len(trail_task) - 1 self._artist_cache[trail_task.task_name] = [ self._ax.plot( *Matplotlib3DPlotter._trail_extract_xyz_impl(i, trail_task), color=trail_task.color, linewidth=trail_task.line_width, alpha=trail_task.alpha * (1.0 - i / (trail_length - 1)) )[0] for i in range(trail_length) ] def _trail_update_impl(self, trail_task): trails_artists = self._artist_cache[trail_task.task_name] for i in range(len(trail_task) - 1): artist = trails_artists[i] xs, ys, zs = Matplotlib3DPlotter._trail_extract_xyz_impl(i, trail_task) artist.set_data(xs, ys) artist.set_3d_properties(zs) if trail_task.influence_lim: self._update_lim(xs, ys, zs) def _create_impl(self, task_list): for task in task_list: self._create_impl_callables[task.task_type](task) self._draw() def _update_impl(self, task_list): for task in task_list: self._update_impl_callables[task.task_type](task) self._draw() def _set_aspect_equal_3d(self): xlim = self._ax.get_xlim3d() ylim = self._ax.get_ylim3d() zlim = self._ax.get_zlim3d() xmean = np.mean(xlim) ymean = np.mean(ylim) zmean = np.mean(zlim) plot_radius = max( [ abs(lim - mean_) for lims, mean_ in ((xlim, xmean), (ylim, ymean), (zlim, zmean)) for lim in lims ] ) self._ax.set_xlim3d([xmean - plot_radius, xmean + plot_radius]) self._ax.set_ylim3d([ymean - plot_radius, ymean + plot_radius]) self._ax.set_zlim3d([zmean - plot_radius, zmean + plot_radius]) def _draw(self): self._set_lim() self._set_aspect_equal_3d() self._fig.canvas.draw() self._fig.canvas.flush_events() plt.pause(0.00001) ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/simple_plotter_tasks.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ This is where all the task primitives are defined """ import numpy as np from .core import BasePlotterTask class DrawXDLines(BasePlotterTask): _lines: np.ndarray _color: str _line_width: int _alpha: float _influence_lim: bool def __init__( self, task_name: str, lines: np.ndarray, color: str = "blue", line_width: int = 2, alpha: float = 1.0, influence_lim: bool = True, ) -> None: super().__init__(task_name=task_name, task_type=self.__class__.__name__) self._color = color self._line_width = line_width self._alpha = alpha self._influence_lim = influence_lim self.update(lines) @property def influence_lim(self) -> bool: return self._influence_lim @property def raw_data(self): return self._lines @property def color(self): return self._color @property def line_width(self): return self._line_width @property def alpha(self): return self._alpha @property def dim(self): raise NotImplementedError @property def name(self): return "{}DLines".format(self.dim) def update(self, lines): self._lines = np.array(lines) shape = self._lines.shape assert shape[-1] == self.dim and shape[-2] == 2 and len(shape) == 3 def __getitem__(self, index): return self._lines[index] def __len__(self): return self._lines.shape[0] def __iter__(self): yield self class DrawXDDots(BasePlotterTask): _dots: np.ndarray _color: str _marker_size: int _alpha: float _influence_lim: bool def __init__( self, task_name: str, dots: np.ndarray, color: str = "blue", marker_size: int = 10, alpha: float = 1.0, influence_lim: bool = True, ) -> None: super().__init__(task_name=task_name, task_type=self.__class__.__name__) self._color = color self._marker_size = marker_size self._alpha = alpha self._influence_lim = influence_lim self.update(dots) def update(self, dots): self._dots = np.array(dots) shape = self._dots.shape assert shape[-1] == self.dim and len(shape) == 2 def __getitem__(self, index): return self._dots[index] def __len__(self): return self._dots.shape[0] def __iter__(self): yield self @property def influence_lim(self) -> bool: return self._influence_lim @property def raw_data(self): return self._dots @property def color(self): return self._color @property def marker_size(self): return self._marker_size @property def alpha(self): return self._alpha @property def dim(self): raise NotImplementedError @property def name(self): return "{}DDots".format(self.dim) class DrawXDTrail(DrawXDDots): @property def line_width(self): return self.marker_size @property def name(self): return "{}DTrail".format(self.dim) class Draw2DLines(DrawXDLines): @property def dim(self): return 2 class Draw3DLines(DrawXDLines): @property def dim(self): return 3 class Draw2DDots(DrawXDDots): @property def dim(self): return 2 class Draw3DDots(DrawXDDots): @property def dim(self): return 3 class Draw2DTrail(DrawXDTrail): @property def dim(self): return 2 class Draw3DTrail(DrawXDTrail): @property def dim(self): return 3 ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/skeleton_plotter_tasks.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ This is where all skeleton related complex tasks are defined (skeleton state and skeleton motion) """ import numpy as np from .core import BasePlotterTask from .simple_plotter_tasks import Draw3DDots, Draw3DLines, Draw3DTrail class Draw3DSkeletonState(BasePlotterTask): _lines_task: Draw3DLines # sub-task for drawing lines _dots_task: Draw3DDots # sub-task for drawing dots def __init__( self, task_name: str, skeleton_state, joints_color: str = "red", lines_color: str = "blue", alpha=1.0, ) -> None: super().__init__(task_name=task_name, task_type="3DSkeletonState") lines, dots = Draw3DSkeletonState._get_lines_and_dots(skeleton_state) self._lines_task = Draw3DLines( self.get_scoped_name("bodies"), lines, joints_color, alpha=alpha ) self._dots_task = Draw3DDots( self.get_scoped_name("joints"), dots, lines_color, alpha=alpha ) @property def name(self): return "3DSkeleton" def update(self, skeleton_state) -> None: self._update(*Draw3DSkeletonState._get_lines_and_dots(skeleton_state)) @staticmethod def _get_lines_and_dots(skeleton_state): """Get all the lines and dots needed to draw the skeleton state """ assert ( len(skeleton_state.tensor.shape) == 1 ), "the state has to be zero dimensional" dots = skeleton_state.global_translation.numpy() skeleton_tree = skeleton_state.skeleton_tree parent_indices = skeleton_tree.parent_indices.numpy() lines = [] for node_index in range(len(skeleton_tree)): parent_index = parent_indices[node_index] if parent_index != -1: lines.append([dots[node_index], dots[parent_index]]) lines = np.array(lines) return lines, dots def _update(self, lines, dots) -> None: self._lines_task.update(lines) self._dots_task.update(dots) def __iter__(self): yield from self._lines_task yield from self._dots_task class Draw3DSkeletonMotion(BasePlotterTask): def __init__( self, task_name: str, skeleton_motion, frame_index=None, joints_color="red", lines_color="blue", velocity_color="green", angular_velocity_color="purple", trail_color="black", trail_length=10, alpha=1.0, ) -> None: super().__init__(task_name=task_name, task_type="3DSkeletonMotion") self._trail_length = trail_length self._skeleton_motion = skeleton_motion # if frame_index is None: curr_skeleton_motion = self._skeleton_motion.clone() if frame_index is not None: curr_skeleton_motion.tensor = self._skeleton_motion.tensor[frame_index, :] # else: # curr_skeleton_motion = self._skeleton_motion[frame_index, :] self._skeleton_state_task = Draw3DSkeletonState( self.get_scoped_name("skeleton_state"), curr_skeleton_motion, joints_color=joints_color, lines_color=lines_color, alpha=alpha, ) vel_lines, avel_lines = Draw3DSkeletonMotion._get_vel_and_avel( curr_skeleton_motion ) self._com_pos = curr_skeleton_motion.root_translation.numpy()[ np.newaxis, ... ].repeat(trail_length, axis=0) self._vel_task = Draw3DLines( self.get_scoped_name("velocity"), vel_lines, velocity_color, influence_lim=False, alpha=alpha, ) self._avel_task = Draw3DLines( self.get_scoped_name("angular_velocity"), avel_lines, angular_velocity_color, influence_lim=False, alpha=alpha, ) self._com_trail_task = Draw3DTrail( self.get_scoped_name("com_trail"), self._com_pos, trail_color, marker_size=2, influence_lim=True, alpha=alpha, ) @property def name(self): return "3DSkeletonMotion" def update(self, frame_index=None, reset_trail=False, skeleton_motion=None) -> None: if skeleton_motion is not None: self._skeleton_motion = skeleton_motion curr_skeleton_motion = self._skeleton_motion.clone() if frame_index is not None: curr_skeleton_motion.tensor = curr_skeleton_motion.tensor[frame_index, :] if reset_trail: self._com_pos = curr_skeleton_motion.root_translation.numpy()[ np.newaxis, ... ].repeat(self._trail_length, axis=0) else: self._com_pos = np.concatenate( ( curr_skeleton_motion.root_translation.numpy()[np.newaxis, ...], self._com_pos[:-1], ), axis=0, ) self._skeleton_state_task.update(curr_skeleton_motion) self._com_trail_task.update(self._com_pos) self._update(*Draw3DSkeletonMotion._get_vel_and_avel(curr_skeleton_motion)) @staticmethod def _get_vel_and_avel(skeleton_motion): """Get all the velocity and angular velocity lines """ pos = skeleton_motion.global_translation.numpy() vel = skeleton_motion.global_velocity.numpy() avel = skeleton_motion.global_angular_velocity.numpy() vel_lines = np.stack((pos, pos + vel * 0.02), axis=1) avel_lines = np.stack((pos, pos + avel * 0.01), axis=1) return vel_lines, avel_lines def _update(self, vel_lines, avel_lines) -> None: self._vel_task.update(vel_lines) self._avel_task.update(avel_lines) def __iter__(self): yield from self._skeleton_state_task yield from self._vel_task yield from self._avel_task yield from self._com_trail_task class Draw3DSkeletonMotions(BasePlotterTask): def __init__(self, skeleton_motion_tasks) -> None: self._skeleton_motion_tasks = skeleton_motion_tasks @property def name(self): return "3DSkeletonMotions" def update(self, frame_index) -> None: list(map(lambda x: x.update(frame_index), self._skeleton_motion_tasks)) def __iter__(self): yield from self._skeleton_state_tasks ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/tests/__init__.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/poselib/visualization/tests/test_plotter.py ================================================ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. from typing import cast import matplotlib.pyplot as plt import numpy as np from ..core import BasePlotterTask, BasePlotterTasks from ..plt_plotter import Matplotlib3DPlotter from ..simple_plotter_tasks import Draw3DDots, Draw3DLines task = Draw3DLines(task_name="test", lines=np.array([[[0, 0, 0], [0, 0, 1]], [[0, 1, 1], [0, 1, 0]]]), color="blue") task2 = Draw3DDots(task_name="test2", dots=np.array([[0, 0, 0], [0, 0, 1], [0, 1, 1], [0, 1, 0]]), color="red") task3 = BasePlotterTasks([task, task2]) plotter = Matplotlib3DPlotter(cast(BasePlotterTask, task3)) plt.show() ================================================ FILE: timechamber/tasks/ase_humanoid_base/poselib/retarget_motion.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from isaacgym.torch_utils import * import torch import json import numpy as np from poselib.core.rotation3d import * from poselib.skeleton.skeleton3d import SkeletonTree, SkeletonState, SkeletonMotion from poselib.visualization.common import plot_skeleton_state, plot_skeleton_motion_interactive """ This scripts shows how to retarget a motion clip from the source skeleton to a target skeleton. Data required for retargeting are stored in a retarget config dictionary as a json file. This file contains: - source_motion: a SkeletonMotion npy format representation of a motion sequence. The motion clip should use the same skeleton as the source T-Pose skeleton. - target_motion_path: path to save the retargeted motion to - source_tpose: a SkeletonState npy format representation of the source skeleton in it's T-Pose state - target_tpose: a SkeletonState npy format representation of the target skeleton in it's T-Pose state (pose should match source T-Pose) - joint_mapping: mapping of joint names from source to target - rotation: root rotation offset from source to target skeleton (for transforming across different orientation axes), represented as a quaternion in XYZW order. - scale: scale offset from source to target skeleton """ VISUALIZE = False def project_joints(motion): right_upper_arm_id = motion.skeleton_tree._node_indices["right_upper_arm"] right_lower_arm_id = motion.skeleton_tree._node_indices["right_lower_arm"] right_hand_id = motion.skeleton_tree._node_indices["right_hand"] left_upper_arm_id = motion.skeleton_tree._node_indices["left_upper_arm"] left_lower_arm_id = motion.skeleton_tree._node_indices["left_lower_arm"] left_hand_id = motion.skeleton_tree._node_indices["left_hand"] right_thigh_id = motion.skeleton_tree._node_indices["right_thigh"] right_shin_id = motion.skeleton_tree._node_indices["right_shin"] right_foot_id = motion.skeleton_tree._node_indices["right_foot"] left_thigh_id = motion.skeleton_tree._node_indices["left_thigh"] left_shin_id = motion.skeleton_tree._node_indices["left_shin"] left_foot_id = motion.skeleton_tree._node_indices["left_foot"] device = motion.global_translation.device # right arm right_upper_arm_pos = motion.global_translation[..., right_upper_arm_id, :] right_lower_arm_pos = motion.global_translation[..., right_lower_arm_id, :] right_hand_pos = motion.global_translation[..., right_hand_id, :] right_shoulder_rot = motion.local_rotation[..., right_upper_arm_id, :] right_elbow_rot = motion.local_rotation[..., right_lower_arm_id, :] right_arm_delta0 = right_upper_arm_pos - right_lower_arm_pos right_arm_delta1 = right_hand_pos - right_lower_arm_pos right_arm_delta0 = right_arm_delta0 / torch.norm(right_arm_delta0, dim=-1, keepdim=True) right_arm_delta1 = right_arm_delta1 / torch.norm(right_arm_delta1, dim=-1, keepdim=True) right_elbow_dot = torch.sum(-right_arm_delta0 * right_arm_delta1, dim=-1) right_elbow_dot = torch.clamp(right_elbow_dot, -1.0, 1.0) right_elbow_theta = torch.acos(right_elbow_dot) right_elbow_q = quat_from_angle_axis(-torch.abs(right_elbow_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), device=device, dtype=torch.float32)) right_elbow_local_dir = motion.skeleton_tree.local_translation[right_hand_id] right_elbow_local_dir = right_elbow_local_dir / torch.norm(right_elbow_local_dir) right_elbow_local_dir_tile = torch.tile(right_elbow_local_dir.unsqueeze(0), [right_elbow_rot.shape[0], 1]) right_elbow_local_dir0 = quat_rotate(right_elbow_rot, right_elbow_local_dir_tile) right_elbow_local_dir1 = quat_rotate(right_elbow_q, right_elbow_local_dir_tile) right_arm_dot = torch.sum(right_elbow_local_dir0 * right_elbow_local_dir1, dim=-1) right_arm_dot = torch.clamp(right_arm_dot, -1.0, 1.0) right_arm_theta = torch.acos(right_arm_dot) right_arm_theta = torch.where(right_elbow_local_dir0[..., 1] <= 0, right_arm_theta, -right_arm_theta) right_arm_q = quat_from_angle_axis(right_arm_theta, right_elbow_local_dir.unsqueeze(0)) right_shoulder_rot = quat_mul(right_shoulder_rot, right_arm_q) # left arm left_upper_arm_pos = motion.global_translation[..., left_upper_arm_id, :] left_lower_arm_pos = motion.global_translation[..., left_lower_arm_id, :] left_hand_pos = motion.global_translation[..., left_hand_id, :] left_shoulder_rot = motion.local_rotation[..., left_upper_arm_id, :] left_elbow_rot = motion.local_rotation[..., left_lower_arm_id, :] left_arm_delta0 = left_upper_arm_pos - left_lower_arm_pos left_arm_delta1 = left_hand_pos - left_lower_arm_pos left_arm_delta0 = left_arm_delta0 / torch.norm(left_arm_delta0, dim=-1, keepdim=True) left_arm_delta1 = left_arm_delta1 / torch.norm(left_arm_delta1, dim=-1, keepdim=True) left_elbow_dot = torch.sum(-left_arm_delta0 * left_arm_delta1, dim=-1) left_elbow_dot = torch.clamp(left_elbow_dot, -1.0, 1.0) left_elbow_theta = torch.acos(left_elbow_dot) left_elbow_q = quat_from_angle_axis(-torch.abs(left_elbow_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), device=device, dtype=torch.float32)) left_elbow_local_dir = motion.skeleton_tree.local_translation[left_hand_id] left_elbow_local_dir = left_elbow_local_dir / torch.norm(left_elbow_local_dir) left_elbow_local_dir_tile = torch.tile(left_elbow_local_dir.unsqueeze(0), [left_elbow_rot.shape[0], 1]) left_elbow_local_dir0 = quat_rotate(left_elbow_rot, left_elbow_local_dir_tile) left_elbow_local_dir1 = quat_rotate(left_elbow_q, left_elbow_local_dir_tile) left_arm_dot = torch.sum(left_elbow_local_dir0 * left_elbow_local_dir1, dim=-1) left_arm_dot = torch.clamp(left_arm_dot, -1.0, 1.0) left_arm_theta = torch.acos(left_arm_dot) left_arm_theta = torch.where(left_elbow_local_dir0[..., 1] <= 0, left_arm_theta, -left_arm_theta) left_arm_q = quat_from_angle_axis(left_arm_theta, left_elbow_local_dir.unsqueeze(0)) left_shoulder_rot = quat_mul(left_shoulder_rot, left_arm_q) # right leg right_thigh_pos = motion.global_translation[..., right_thigh_id, :] right_shin_pos = motion.global_translation[..., right_shin_id, :] right_foot_pos = motion.global_translation[..., right_foot_id, :] right_hip_rot = motion.local_rotation[..., right_thigh_id, :] right_knee_rot = motion.local_rotation[..., right_shin_id, :] right_leg_delta0 = right_thigh_pos - right_shin_pos right_leg_delta1 = right_foot_pos - right_shin_pos right_leg_delta0 = right_leg_delta0 / torch.norm(right_leg_delta0, dim=-1, keepdim=True) right_leg_delta1 = right_leg_delta1 / torch.norm(right_leg_delta1, dim=-1, keepdim=True) right_knee_dot = torch.sum(-right_leg_delta0 * right_leg_delta1, dim=-1) right_knee_dot = torch.clamp(right_knee_dot, -1.0, 1.0) right_knee_theta = torch.acos(right_knee_dot) right_knee_q = quat_from_angle_axis(torch.abs(right_knee_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), device=device, dtype=torch.float32)) right_knee_local_dir = motion.skeleton_tree.local_translation[right_foot_id] right_knee_local_dir = right_knee_local_dir / torch.norm(right_knee_local_dir) right_knee_local_dir_tile = torch.tile(right_knee_local_dir.unsqueeze(0), [right_knee_rot.shape[0], 1]) right_knee_local_dir0 = quat_rotate(right_knee_rot, right_knee_local_dir_tile) right_knee_local_dir1 = quat_rotate(right_knee_q, right_knee_local_dir_tile) right_leg_dot = torch.sum(right_knee_local_dir0 * right_knee_local_dir1, dim=-1) right_leg_dot = torch.clamp(right_leg_dot, -1.0, 1.0) right_leg_theta = torch.acos(right_leg_dot) right_leg_theta = torch.where(right_knee_local_dir0[..., 1] >= 0, right_leg_theta, -right_leg_theta) right_leg_q = quat_from_angle_axis(right_leg_theta, right_knee_local_dir.unsqueeze(0)) right_hip_rot = quat_mul(right_hip_rot, right_leg_q) # left leg left_thigh_pos = motion.global_translation[..., left_thigh_id, :] left_shin_pos = motion.global_translation[..., left_shin_id, :] left_foot_pos = motion.global_translation[..., left_foot_id, :] left_hip_rot = motion.local_rotation[..., left_thigh_id, :] left_knee_rot = motion.local_rotation[..., left_shin_id, :] left_leg_delta0 = left_thigh_pos - left_shin_pos left_leg_delta1 = left_foot_pos - left_shin_pos left_leg_delta0 = left_leg_delta0 / torch.norm(left_leg_delta0, dim=-1, keepdim=True) left_leg_delta1 = left_leg_delta1 / torch.norm(left_leg_delta1, dim=-1, keepdim=True) left_knee_dot = torch.sum(-left_leg_delta0 * left_leg_delta1, dim=-1) left_knee_dot = torch.clamp(left_knee_dot, -1.0, 1.0) left_knee_theta = torch.acos(left_knee_dot) left_knee_q = quat_from_angle_axis(torch.abs(left_knee_theta), torch.tensor(np.array([[0.0, 1.0, 0.0]]), device=device, dtype=torch.float32)) left_knee_local_dir = motion.skeleton_tree.local_translation[left_foot_id] left_knee_local_dir = left_knee_local_dir / torch.norm(left_knee_local_dir) left_knee_local_dir_tile = torch.tile(left_knee_local_dir.unsqueeze(0), [left_knee_rot.shape[0], 1]) left_knee_local_dir0 = quat_rotate(left_knee_rot, left_knee_local_dir_tile) left_knee_local_dir1 = quat_rotate(left_knee_q, left_knee_local_dir_tile) left_leg_dot = torch.sum(left_knee_local_dir0 * left_knee_local_dir1, dim=-1) left_leg_dot = torch.clamp(left_leg_dot, -1.0, 1.0) left_leg_theta = torch.acos(left_leg_dot) left_leg_theta = torch.where(left_knee_local_dir0[..., 1] >= 0, left_leg_theta, -left_leg_theta) left_leg_q = quat_from_angle_axis(left_leg_theta, left_knee_local_dir.unsqueeze(0)) left_hip_rot = quat_mul(left_hip_rot, left_leg_q) new_local_rotation = motion.local_rotation.clone() new_local_rotation[..., right_upper_arm_id, :] = right_shoulder_rot new_local_rotation[..., right_lower_arm_id, :] = right_elbow_q new_local_rotation[..., left_upper_arm_id, :] = left_shoulder_rot new_local_rotation[..., left_lower_arm_id, :] = left_elbow_q new_local_rotation[..., right_thigh_id, :] = right_hip_rot new_local_rotation[..., right_shin_id, :] = right_knee_q new_local_rotation[..., left_thigh_id, :] = left_hip_rot new_local_rotation[..., left_shin_id, :] = left_knee_q new_local_rotation[..., left_hand_id, :] = quat_identity([1]) new_local_rotation[..., right_hand_id, :] = quat_identity([1]) new_sk_state = SkeletonState.from_rotation_and_root_translation(motion.skeleton_tree, new_local_rotation, motion.root_translation, is_local=True) new_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=motion.fps) return new_motion def main(): # load retarget config retarget_data_path = "data/configs/retarget_cmu_to_amp.json" with open(retarget_data_path) as f: retarget_data = json.load(f) # load and visualize t-pose files source_tpose = SkeletonState.from_file(retarget_data["source_tpose"]) if VISUALIZE: plot_skeleton_state(source_tpose) target_tpose = SkeletonState.from_file(retarget_data["target_tpose"]) if VISUALIZE: plot_skeleton_state(target_tpose) # load and visualize source motion sequence source_motion = SkeletonMotion.from_file(retarget_data["source_motion"]) if VISUALIZE: plot_skeleton_motion_interactive(source_motion) # parse data from retarget config joint_mapping = retarget_data["joint_mapping"] rotation_to_target_skeleton = torch.tensor(retarget_data["rotation"]) # run retargeting target_motion = source_motion.retarget_to_by_tpose( joint_mapping=retarget_data["joint_mapping"], source_tpose=source_tpose, target_tpose=target_tpose, rotation_to_target_skeleton=rotation_to_target_skeleton, scale_to_target_skeleton=retarget_data["scale"] ) # keep frames between [trim_frame_beg, trim_frame_end - 1] frame_beg = retarget_data["trim_frame_beg"] frame_end = retarget_data["trim_frame_end"] if (frame_beg == -1): frame_beg = 0 if (frame_end == -1): frame_end = target_motion.local_rotation.shape[0] local_rotation = target_motion.local_rotation root_translation = target_motion.root_translation local_rotation = local_rotation[frame_beg:frame_end, ...] root_translation = root_translation[frame_beg:frame_end, ...] new_sk_state = SkeletonState.from_rotation_and_root_translation(target_motion.skeleton_tree, local_rotation, root_translation, is_local=True) target_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=target_motion.fps) # need to convert some joints from 3D to 1D (e.g. elbows and knees) target_motion = project_joints(target_motion) # move the root so that the feet are on the ground local_rotation = target_motion.local_rotation root_translation = target_motion.root_translation tar_global_pos = target_motion.global_translation min_h = torch.min(tar_global_pos[..., 2]) root_translation[:, 2] += -min_h # adjust the height of the root to avoid ground penetration root_height_offset = retarget_data["root_height_offset"] root_translation[:, 2] += root_height_offset new_sk_state = SkeletonState.from_rotation_and_root_translation(target_motion.skeleton_tree, local_rotation, root_translation, is_local=True) target_motion = SkeletonMotion.from_skeleton_state(new_sk_state, fps=target_motion.fps) # save retargeted motion target_motion.to_file(retarget_data["target_motion_path"]) # visualize retargeted motion plot_skeleton_motion_interactive(target_motion) return if __name__ == '__main__': main() ================================================ FILE: timechamber/tasks/base/__init__.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: timechamber/tasks/base/ma_vec_task.py ================================================ # Copyright (c) 2018-2021, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from typing import Dict, Any, Tuple import gym from gym import spaces from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import to_torch from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, \ apply_random_samples, check_buckets, generate_random_samples import torch import numpy as np import operator, random from copy import deepcopy import sys import abc from .vec_task import Env class MA_VecTask(Env): def __init__(self, config, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture: bool = False, force_render: bool = False): """Initialise the `MA_VecTask`. Args: config: config dictionary for the environment. sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu' graphics_device_id: the device ID to render with. headless: Set to False to disable viewer rendering. """ super().__init__(config, rl_device, sim_device, graphics_device_id, headless) self.virtual_screen_capture = virtual_screen_capture self.force_render = force_render self.sim_params = self.__parse_sim_params(self.cfg["physics_engine"], self.cfg["sim"]) if self.cfg["physics_engine"] == "physx": self.physics_engine = gymapi.SIM_PHYSX elif self.cfg["physics_engine"] == "flex": self.physics_engine = gymapi.SIM_FLEX else: msg = f"Invalid physics engine backend: {self.cfg['physics_engine']}" raise ValueError(msg) # optimization flags for pytorch JIT torch._C._jit_set_profiling_mode(False) torch._C._jit_set_profiling_executor(False) self.gym = gymapi.acquire_gym() self.first_randomization = True self.original_props = {} self.dr_randomizations = {} self.actor_params_generator = None self.extern_actor_params = {} self.last_step = -1 self.last_rand_step = -1 for env_id in range(self.num_envs): self.extern_actor_params[env_id] = None # create envs, sim and viewer self.sim_initialized = False self.create_sim() self.gym.prepare_sim(self.sim) self.sim_initialized = True self.set_viewer() self.allocate_buffers() self.obs_dict = {} def set_viewer(self): """Create the viewer.""" # todo: read from config self.enable_viewer_sync = True self.viewer = None # if running with a viewer, set up keyboard shortcuts and camera if self.headless == False: # subscribe to keyboard shortcuts self.viewer = self.gym.create_viewer( self.sim, gymapi.CameraProperties()) self.gym.subscribe_viewer_keyboard_event( self.viewer, gymapi.KEY_ESCAPE, "QUIT") self.gym.subscribe_viewer_keyboard_event( self.viewer, gymapi.KEY_V, "toggle_viewer_sync") # set the camera position based on up axis sim_params = self.gym.get_sim_params(self.sim) if sim_params.up_axis == gymapi.UP_AXIS_Z: cam_pos = gymapi.Vec3(20.0, 25.0, 3.0) cam_target = gymapi.Vec3(10.0, 15.0, 0.0) else: cam_pos = gymapi.Vec3(20.0, 3.0, 25.0) cam_target = gymapi.Vec3(10.0, 0.0, 15.0) self.gym.viewer_camera_look_at( self.viewer, None, cam_pos, cam_target) def allocate_buffers(self): """Allocate the observation, states, etc. buffers. These are what is used to set observations and states in the environment classes which inherit from this one, and are read in `step` and other related functions. """ # allocate buffers self.obs_buf = torch.zeros( (self.num_envs * self.num_agents, self.num_obs), device=self.device, dtype=torch.float) self.states_buf = torch.zeros( (self.num_envs, self.num_states), device=self.device, dtype=torch.float) self.rew_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.float) self.reset_buf = torch.ones( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.timeout_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.progress_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.randomize_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.extras = {} def set_sim_params_up_axis(self, sim_params: gymapi.SimParams, axis: str) -> int: """Set gravity based on up axis and return axis index. Args: sim_params: sim params to modify the axis for. axis: axis to set sim params for. Returns: axis index for up axis. """ if axis == 'z': sim_params.up_axis = gymapi.UP_AXIS_Z sim_params.gravity.x = 0 sim_params.gravity.y = 0 sim_params.gravity.z = -9.81 return 2 return 1 def create_sim(self, compute_device: int, graphics_device: int, physics_engine, sim_params: gymapi.SimParams): """Create an Isaac Gym sim object. Args: compute_device: ID of compute device to use. graphics_device: ID of graphics device to use. physics_engine: physics engine to use (`gymapi.SIM_PHYSX` or `gymapi.SIM_FLEX`) sim_params: sim params to use. Returns: the Isaac Gym sim object. """ sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params) if sim is None: print("*** Failed to create sim") quit() return sim def get_state(self): """Returns the state buffer of the environment (the priviledged observations for asymmetric training).""" return torch.clamp(self.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) @abc.abstractmethod def pre_physics_step(self, actions: torch.Tensor): """Apply the actions to the environment (eg by setting torques, position targets). Args: actions: the actions to apply """ @abc.abstractmethod def post_physics_step(self): """Compute reward and observations, reset any environments that require it.""" def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]: """Step the physics of the environment. Args: actions: actions to apply Returns: Observations, rewards, resets, info Observations are dict of observations (currently only one member called 'obs') """ # randomize actions if self.dr_randomizations.get('actions', None): actions = self.dr_randomizations['actions']['noise_lambda'](actions) # apply actions self.pre_physics_step(actions) # step physics and render each frame for i in range(self.control_freq_inv): if self.force_render: self.render() self.gym.simulate(self.sim) # to fix! if self.device == 'cpu': self.gym.fetch_results(self.sim, True) # fill time out buffer self.timeout_buf = torch.where(self.progress_buf >= self.max_episode_length - 1, torch.ones_like(self.timeout_buf), torch.zeros_like(self.timeout_buf)) # compute observations, rewards, resets, ... self.post_physics_step() # randomize observations if self.dr_randomizations.get('observations', None): self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf) self.extras["time_outs"] = self.timeout_buf.to(self.rl_device) return def zero_actions(self) -> torch.Tensor: """Returns a buffer with zero actions. Returns: A buffer of zero torch actions """ actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32, device=self.rl_device) return actions def reset(self, env_ids=None) -> torch.Tensor: """Reset the environment. """ if (env_ids is None): # zero_actions = self.zero_actions() # self.step(zero_actions) env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long) self.reset_idx(env_ids) self.compute_observations() self.pos_before = self.obs_buf[:self.num_envs, :2].clone() else: self._reset_envs(env_ids=env_ids) return def _reset_envs(self, env_ids): if (len(env_ids) > 0): self.reset_idx(env_ids) self.compute_observations() self.pos_before = self.obs_buf[:self.num_envs, :2].clone() return def reset_done(self): """Reset the environment. Returns: Observation dictionary, indices of environments being reset """ done_env_ids = self.reset_buf.nonzero(as_tuple=False).flatten() if len(done_env_ids) > 0: self.reset_idx(done_env_ids) self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) # asymmetric actor-critic if self.num_states > 0: self.obs_dict["states"] = self.get_state() return self.obs_dict, done_env_ids def render(self): """Draw the frame to the viewer, and check for keyboard events.""" if self.viewer: # check for window closed if self.gym.query_viewer_has_closed(self.viewer): sys.exit() # check for keyboard events for evt in self.gym.query_viewer_action_events(self.viewer): if evt.action == "QUIT" and evt.value > 0: sys.exit() elif evt.action == "toggle_viewer_sync" and evt.value > 0: self.enable_viewer_sync = not self.enable_viewer_sync # fetch results if self.device != 'cpu': self.gym.fetch_results(self.sim, True) # step graphics if self.enable_viewer_sync: self.gym.step_graphics(self.sim) self.gym.draw_viewer(self.viewer, self.sim, True) # Wait for dt to elapse in real time. # This synchronizes the physics simulation with the rendering rate. self.gym.sync_frame_time(self.sim) else: self.gym.poll_viewer_events(self.viewer) def __parse_sim_params(self, physics_engine: str, config_sim: Dict[str, Any]) -> gymapi.SimParams: """Parse the config dictionary for physics stepping settings. Args: physics_engine: which physics engine to use. "physx" or "flex" config_sim: dict of sim configuration parameters Returns IsaacGym SimParams object with updated settings. """ sim_params = gymapi.SimParams() # check correct up-axis if config_sim["up_axis"] not in ["z", "y"]: msg = f"Invalid physics up-axis: {config_sim['up_axis']}" print(msg) raise ValueError(msg) # assign general sim parameters sim_params.dt = config_sim["dt"] sim_params.num_client_threads = config_sim.get("num_client_threads", 0) sim_params.use_gpu_pipeline = config_sim["use_gpu_pipeline"] sim_params.substeps = config_sim.get("substeps", 2) # assign up-axis if config_sim["up_axis"] == "z": sim_params.up_axis = gymapi.UP_AXIS_Z else: sim_params.up_axis = gymapi.UP_AXIS_Y # assign gravity sim_params.gravity = gymapi.Vec3(*config_sim["gravity"]) # configure physics parameters if physics_engine == "physx": # set the parameters if "physx" in config_sim: for opt in config_sim["physx"].keys(): if opt == "contact_collection": setattr(sim_params.physx, opt, gymapi.ContactCollection(config_sim["physx"][opt])) else: setattr(sim_params.physx, opt, config_sim["physx"][opt]) else: # set the parameters if "flex" in config_sim: for opt in config_sim["flex"].keys(): setattr(sim_params.flex, opt, config_sim["flex"][opt]) # return the configured params return sim_params """ Domain Randomization methods """ def get_actor_params_info(self, dr_params: Dict[str, Any], env): """Generate a flat array of actor params, their names and ranges. Returns: The array """ if "actor_params" not in dr_params: return None params = [] names = [] lows = [] highs = [] param_getters_map = get_property_getter_map(self.gym) for actor, actor_properties in dr_params["actor_params"].items(): handle = self.gym.find_actor_handle(env, actor) for prop_name, prop_attrs in actor_properties.items(): if prop_name == 'color': continue # this is set randomly props = param_getters_map[prop_name](env, handle) if not isinstance(props, list): props = [props] for prop_idx, prop in enumerate(props): for attr, attr_randomization_params in prop_attrs.items(): name = prop_name + '_' + str(prop_idx) + '_' + attr lo_hi = attr_randomization_params['range'] distr = attr_randomization_params['distribution'] if 'uniform' not in distr: lo_hi = (-1.0 * float('Inf'), float('Inf')) if isinstance(prop, np.ndarray): for attr_idx in range(prop[attr].shape[0]): params.append(prop[attr][attr_idx]) names.append(name + '_' + str(attr_idx)) lows.append(lo_hi[0]) highs.append(lo_hi[1]) else: params.append(getattr(prop, attr)) names.append(name) lows.append(lo_hi[0]) highs.append(lo_hi[1]) return params, names, lows, highs def apply_randomizations(self, dr_params): """Apply domain randomizations to the environment. Note that currently we can only apply randomizations only on resets, due to current PhysX limitations Args: dr_params: parameters for domain randomization to use. """ # If we don't have a randomization frequency, randomize every step rand_freq = dr_params.get("frequency", 1) # First, determine what to randomize: # - non-environment parameters when > frequency steps have passed since the last non-environment # - physical environments in the reset buffer, which have exceeded the randomization frequency threshold # - on the first call, randomize everything self.last_step = self.gym.get_frame_count(self.sim) if self.first_randomization: do_nonenv_randomize = True env_ids = list(range(self.num_envs)) else: do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf)) rand_envs = torch.logical_and(rand_envs, self.reset_buf) env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist() self.randomize_buf[rand_envs] = 0 if do_nonenv_randomize: self.last_rand_step = self.last_step param_setters_map = get_property_setter_map(self.gym) param_setter_defaults_map = get_default_setter_args(self.gym) param_getters_map = get_property_getter_map(self.gym) # On first iteration, check the number of buckets if self.first_randomization: check_buckets(self.gym, self.envs, dr_params) for nonphysical_param in ["observations", "actions"]: if nonphysical_param in dr_params and do_nonenv_randomize: dist = dr_params[nonphysical_param]["distribution"] op_type = dr_params[nonphysical_param]["operation"] sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[ nonphysical_param] else None sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[ nonphysical_param] else None op = operator.add if op_type == 'additive' else operator.mul if sched_type == 'linear': sched_scaling = 1.0 / sched_step * \ min(self.last_step, sched_step) elif sched_type == 'constant': sched_scaling = 0 if self.last_step < sched_step else 1 else: sched_scaling = 1 if dist == 'gaussian': mu, var = dr_params[nonphysical_param]["range"] mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) if op_type == 'additive': mu *= sched_scaling var *= sched_scaling mu_corr *= sched_scaling var_corr *= sched_scaling elif op_type == 'scaling': var = var * sched_scaling # scale up var over time mu = mu * sched_scaling + 1.0 * \ (1.0 - sched_scaling) # linearly interpolate var_corr = var_corr * sched_scaling # scale up var over time mu_corr = mu_corr * sched_scaling + 1.0 * \ (1.0 - sched_scaling) # linearly interpolate def noise_lambda(tensor, param_name=nonphysical_param): params = self.dr_randomizations[param_name] corr = params.get('corr', None) if corr is None: corr = torch.randn_like(tensor) params['corr'] = corr corr = corr * params['var_corr'] + params['mu_corr'] return op( tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu']) self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda} elif dist == 'uniform': lo, hi = dr_params[nonphysical_param]["range"] lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) if op_type == 'additive': lo *= sched_scaling hi *= sched_scaling lo_corr *= sched_scaling hi_corr *= sched_scaling elif op_type == 'scaling': lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling) hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling) lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) def noise_lambda(tensor, param_name=nonphysical_param): params = self.dr_randomizations[param_name] corr = params.get('corr', None) if corr is None: corr = torch.randn_like(tensor) params['corr'] = corr corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr'] return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo']) self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda} if "sim_params" in dr_params and do_nonenv_randomize: prop_attrs = dr_params["sim_params"] prop = self.gym.get_sim_params(self.sim) if self.first_randomization: self.original_props["sim_params"] = { attr: getattr(prop, attr) for attr in dir(prop)} for attr, attr_randomization_params in prop_attrs.items(): apply_random_samples( prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step) self.gym.set_sim_params(self.sim, prop) # If self.actor_params_generator is initialized: use it to # sample actor simulation params. This gives users the # freedom to generate samples from arbitrary distributions, # e.g. use full-covariance distributions instead of the DR's # default of treating each simulation parameter independently. extern_offsets = {} if self.actor_params_generator is not None: for env_id in env_ids: self.extern_actor_params[env_id] = \ self.actor_params_generator.sample() extern_offsets[env_id] = 0 for actor, actor_properties in dr_params["actor_params"].items(): for env_id in env_ids: env = self.envs[env_id] handle = self.gym.find_actor_handle(env, actor) extern_sample = self.extern_actor_params[env_id] for prop_name, prop_attrs in actor_properties.items(): if prop_name == 'color': num_bodies = self.gym.get_actor_rigid_body_count( env, handle) for n in range(num_bodies): self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL, gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))) continue if prop_name == 'scale': setup_only = prop_attrs.get('setup_only', False) if (setup_only and not self.sim_initialized) or not setup_only: attr_randomization_params = prop_attrs sample = generate_random_samples(attr_randomization_params, 1, self.last_step, None) og_scale = 1 if attr_randomization_params['operation'] == 'scaling': new_scale = og_scale * sample elif attr_randomization_params['operation'] == 'additive': new_scale = og_scale + sample self.gym.set_actor_scale(env, handle, new_scale) continue prop = param_getters_map[prop_name](env, handle) set_random_properties = True if isinstance(prop, list): if self.first_randomization: self.original_props[prop_name] = [ {attr: getattr(p, attr) for attr in dir(p)} for p in prop] for p, og_p in zip(prop, self.original_props[prop_name]): for attr, attr_randomization_params in prop_attrs.items(): setup_only = attr_randomization_params.get('setup_only', False) if (setup_only and not self.sim_initialized) or not setup_only: smpl = None if self.actor_params_generator is not None: smpl, extern_offsets[env_id] = get_attr_val_from_sample( extern_sample, extern_offsets[env_id], p, attr) apply_random_samples( p, og_p, attr, attr_randomization_params, self.last_step, smpl) else: set_random_properties = False else: if self.first_randomization: self.original_props[prop_name] = deepcopy(prop) for attr, attr_randomization_params in prop_attrs.items(): setup_only = attr_randomization_params.get('setup_only', False) if (setup_only and not self.sim_initialized) or not setup_only: smpl = None if self.actor_params_generator is not None: smpl, extern_offsets[env_id] = get_attr_val_from_sample( extern_sample, extern_offsets[env_id], prop, attr) apply_random_samples( prop, self.original_props[prop_name], attr, attr_randomization_params, self.last_step, smpl) else: set_random_properties = False if set_random_properties: setter = param_setters_map[prop_name] default_args = param_setter_defaults_map[prop_name] setter(env, handle, prop, *default_args) if self.actor_params_generator is not None: for env_id in env_ids: # check that we used all dims in sample if extern_offsets[env_id] > 0: extern_sample = self.extern_actor_params[env_id] if extern_offsets[env_id] != extern_sample.shape[0]: print('env_id', env_id, 'extern_offset', extern_offsets[env_id], 'vs extern_sample.shape', extern_sample.shape) raise Exception("Invalid extern_sample size") self.first_randomization = False ================================================ FILE: timechamber/tasks/base/vec_task.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from typing import Dict, Any, Tuple import gym from gym import spaces from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import to_torch from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples import torch import numpy as np import operator, random from copy import deepcopy import sys import abc from abc import ABC EXISTING_SIM = None SCREEN_CAPTURE_RESOLUTION = (1027, 768) def _create_sim_once(gym, *args, **kwargs): global EXISTING_SIM if EXISTING_SIM is not None: return EXISTING_SIM else: EXISTING_SIM = gym.create_sim(*args, **kwargs) return EXISTING_SIM class Env(ABC): def __init__(self, config: Dict[str, Any], rl_device: str, sim_device: str, graphics_device_id: int, headless: bool): """Initialise the env. Args: config: the configuration dictionary. sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu' graphics_device_id: the device ID to render with. headless: Set to False to disable viewer rendering. """ split_device = sim_device.split(":") self.device_type = split_device[0] self.device_id = int(split_device[1]) if len(split_device) > 1 else 0 self.device = "cpu" if config["sim"]["use_gpu_pipeline"]: if self.device_type.lower() == "cuda" or self.device_type.lower() == "gpu": self.device = "cuda" + ":" + str(self.device_id) else: print("GPU Pipeline can only be used with GPU simulation. Forcing CPU Pipeline.") config["sim"]["use_gpu_pipeline"] = False self.rl_device = rl_device # Rendering # if training in a headless mode self.headless = headless enable_camera_sensors = config.get("enableCameraSensors", False) self.graphics_device_id = graphics_device_id if enable_camera_sensors == False and self.headless == True: self.graphics_device_id = -1 self.num_environments = config["env"]["numEnvs"] self.num_agents = config["env"].get("numAgents", 1) # used for multi-agent environments self.num_observations = config["env"]["numObservations"] self.num_states = config["env"].get("numStates", 0) self.num_actions = config["env"]["numActions"] self.control_freq_inv = config["env"].get("controlFrequencyInv", 1) self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf) self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf) self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.) self.clip_obs = config["env"].get("clipObservations", np.Inf) self.clip_actions = config["env"].get("clipActions", np.Inf) @abc.abstractmethod def allocate_buffers(self): """Create torch buffers for observations, rewards, actions dones and any additional data.""" @abc.abstractmethod def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]: """Step the physics of the environment. Args: actions: actions to apply Returns: Observations, rewards, resets, info Observations are dict of observations (currently only one member called 'obs') """ @abc.abstractmethod def reset(self)-> Dict[str, torch.Tensor]: """Reset the environment. Returns: Observation dictionary """ @abc.abstractmethod def reset_idx(self, env_ids: torch.Tensor): """Reset environments having the provided indices. Args: env_ids: environments to reset """ @property def observation_space(self) -> gym.Space: """Get the environment's observation space.""" return self.obs_space @property def action_space(self) -> gym.Space: """Get the environment's action space.""" return self.act_space @property def num_envs(self) -> int: """Get the number of environments.""" return self.num_environments @property def num_acts(self) -> int: """Get the number of actions in the environment.""" return self.num_actions @property def num_obs(self) -> int: """Get the number of observations in the environment.""" return self.num_observations class VecTask(Env): metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 24} def __init__(self, config, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture: bool = False, force_render: bool = False): """Initialise the `VecTask`. Args: config: config dictionary for the environment. sim_device: the device to simulate physics on. eg. 'cuda:0' or 'cpu' graphics_device_id: the device ID to render with. headless: Set to False to disable viewer rendering. virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True) """ super().__init__(config, rl_device, sim_device, graphics_device_id, headless) self.virtual_screen_capture = virtual_screen_capture self.virtual_display = None if self.virtual_screen_capture: from pyvirtualdisplay.smartdisplay import SmartDisplay self.virtual_display = SmartDisplay(size=SCREEN_CAPTURE_RESOLUTION) self.virtual_display.start() self.force_render = force_render self.sim_params = self.__parse_sim_params(self.cfg["physics_engine"], self.cfg["sim"]) if self.cfg["physics_engine"] == "physx": self.physics_engine = gymapi.SIM_PHYSX elif self.cfg["physics_engine"] == "flex": self.physics_engine = gymapi.SIM_FLEX else: msg = f"Invalid physics engine backend: {self.cfg['physics_engine']}" raise ValueError(msg) # optimization flags for pytorch JIT torch._C._jit_set_profiling_mode(False) torch._C._jit_set_profiling_executor(False) self.gym = gymapi.acquire_gym() self.first_randomization = True self.original_props = {} self.dr_randomizations = {} self.actor_params_generator = None self.extern_actor_params = {} self.last_step = -1 self.last_rand_step = -1 for env_id in range(self.num_envs): self.extern_actor_params[env_id] = None # create envs, sim and viewer self.sim_initialized = False self.create_sim() self.gym.prepare_sim(self.sim) self.sim_initialized = True self.set_viewer() self.allocate_buffers() self.obs_dict = {} def set_viewer(self): """Create the viewer.""" # todo: read from config self.enable_viewer_sync = True self.viewer = None # if running with a viewer, set up keyboard shortcuts and camera if self.headless == False: # subscribe to keyboard shortcuts self.viewer = self.gym.create_viewer( self.sim, gymapi.CameraProperties()) self.gym.subscribe_viewer_keyboard_event( self.viewer, gymapi.KEY_ESCAPE, "QUIT") self.gym.subscribe_viewer_keyboard_event( self.viewer, gymapi.KEY_V, "toggle_viewer_sync") # set the camera position based on up axis sim_params = self.gym.get_sim_params(self.sim) if sim_params.up_axis == gymapi.UP_AXIS_Z: cam_pos = gymapi.Vec3(20.0, 25.0, 3.0) cam_target = gymapi.Vec3(10.0, 15.0, 0.0) else: cam_pos = gymapi.Vec3(20.0, 3.0, 25.0) cam_target = gymapi.Vec3(10.0, 0.0, 15.0) self.gym.viewer_camera_look_at( self.viewer, None, cam_pos, cam_target) def allocate_buffers(self): """Allocate the observation, states, etc. buffers. These are what is used to set observations and states in the environment classes which inherit from this one, and are read in `step` and other related functions. """ # allocate buffers self.obs_buf = torch.zeros( (self.num_envs, self.num_obs), device=self.device, dtype=torch.float) self.states_buf = torch.zeros( (self.num_envs, self.num_states), device=self.device, dtype=torch.float) self.rew_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.float) self.reset_buf = torch.ones( self.num_envs, device=self.device, dtype=torch.long) self.timeout_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.progress_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.randomize_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.extras = {} def create_sim(self, compute_device: int, graphics_device: int, physics_engine, sim_params: gymapi.SimParams): """Create an Isaac Gym sim object. Args: compute_device: ID of compute device to use. graphics_device: ID of graphics device to use. physics_engine: physics engine to use (`gymapi.SIM_PHYSX` or `gymapi.SIM_FLEX`) sim_params: sim params to use. Returns: the Isaac Gym sim object. """ sim = _create_sim_once(self.gym, compute_device, graphics_device, physics_engine, sim_params) if sim is None: print("*** Failed to create sim") quit() return sim def get_state(self): """Returns the state buffer of the environment (the privileged observations for asymmetric training).""" return torch.clamp(self.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) @abc.abstractmethod def pre_physics_step(self, actions: torch.Tensor): """Apply the actions to the environment (eg by setting torques, position targets). Args: actions: the actions to apply """ @abc.abstractmethod def post_physics_step(self): """Compute reward and observations, reset any environments that require it.""" def step(self, actions: torch.Tensor) -> Tuple[Dict[str, torch.Tensor], torch.Tensor, torch.Tensor, Dict[str, Any]]: """Step the physics of the environment. Args: actions: actions to apply Returns: Observations, rewards, resets, info Observations are dict of observations (currently only one member called 'obs') """ # randomize actions if self.dr_randomizations.get('actions', None): actions = self.dr_randomizations['actions']['noise_lambda'](actions) action_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions) # apply actions self.pre_physics_step(action_tensor) # step physics and render each frame for i in range(self.control_freq_inv): if self.force_render: self.render() self.gym.simulate(self.sim) # to fix! if self.device == 'cpu': self.gym.fetch_results(self.sim, True) # compute observations, rewards, resets, ... self.post_physics_step() # fill time out buffer: set to 1 if we reached the max episode length AND the reset buffer is 1. Timeout == 1 makes sense only if the reset buffer is 1. self.timeout_buf = (self.progress_buf >= self.max_episode_length - 1) & (self.reset_buf != 0) # randomize observations if self.dr_randomizations.get('observations', None): self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf) self.extras["time_outs"] = self.timeout_buf.to(self.rl_device) self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) # asymmetric actor-critic if self.num_states > 0: self.obs_dict["states"] = self.get_state() return self.obs_dict, self.rew_buf.to(self.rl_device), self.reset_buf.to(self.rl_device), self.extras def zero_actions(self) -> torch.Tensor: """Returns a buffer with zero actions. Returns: A buffer of zero torch actions """ actions = torch.zeros([self.num_envs, self.num_actions], dtype=torch.float32, device=self.rl_device) return actions def reset_idx(self, env_idx): """Reset environment with indces in env_idx. Should be implemented in an environment class inherited from VecTask. """ pass def reset(self): """Is called only once when environment starts to provide the first observations. Doesn't calculate observations. Actual reset and observation calculation need to be implemented by user. Returns: Observation dictionary """ self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) # asymmetric actor-critic if self.num_states > 0: self.obs_dict["states"] = self.get_state() return self.obs_dict def reset_done(self): """Reset the environment. Returns: Observation dictionary, indices of environments being reset """ done_env_ids = self.reset_buf.nonzero(as_tuple=False).flatten() if len(done_env_ids) > 0: self.reset_idx(done_env_ids) self.obs_dict["obs"] = torch.clamp(self.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) # asymmetric actor-critic if self.num_states > 0: self.obs_dict["states"] = self.get_state() return self.obs_dict, done_env_ids def render(self, mode="rgb_array"): """Draw the frame to the viewer, and check for keyboard events.""" if self.viewer: # check for window closed if self.gym.query_viewer_has_closed(self.viewer): sys.exit() # check for keyboard events for evt in self.gym.query_viewer_action_events(self.viewer): if evt.action == "QUIT" and evt.value > 0: sys.exit() elif evt.action == "toggle_viewer_sync" and evt.value > 0: self.enable_viewer_sync = not self.enable_viewer_sync # fetch results if self.device != 'cpu': self.gym.fetch_results(self.sim, True) # step graphics if self.enable_viewer_sync: self.gym.step_graphics(self.sim) self.gym.draw_viewer(self.viewer, self.sim, True) # Wait for dt to elapse in real time. # This synchronizes the physics simulation with the rendering rate. self.gym.sync_frame_time(self.sim) else: self.gym.poll_viewer_events(self.viewer) if self.virtual_display and mode == "rgb_array": img = self.virtual_display.grab() return np.array(img) def __parse_sim_params(self, physics_engine: str, config_sim: Dict[str, Any]) -> gymapi.SimParams: """Parse the config dictionary for physics stepping settings. Args: physics_engine: which physics engine to use. "physx" or "flex" config_sim: dict of sim configuration parameters Returns IsaacGym SimParams object with updated settings. """ sim_params = gymapi.SimParams() # check correct up-axis if config_sim["up_axis"] not in ["z", "y"]: msg = f"Invalid physics up-axis: {config_sim['up_axis']}" print(msg) raise ValueError(msg) # assign general sim parameters sim_params.dt = config_sim["dt"] sim_params.num_client_threads = config_sim.get("num_client_threads", 0) sim_params.use_gpu_pipeline = config_sim["use_gpu_pipeline"] sim_params.substeps = config_sim.get("substeps", 2) # assign up-axis if config_sim["up_axis"] == "z": sim_params.up_axis = gymapi.UP_AXIS_Z else: sim_params.up_axis = gymapi.UP_AXIS_Y # assign gravity sim_params.gravity = gymapi.Vec3(*config_sim["gravity"]) # configure physics parameters if physics_engine == "physx": # set the parameters if "physx" in config_sim: for opt in config_sim["physx"].keys(): if opt == "contact_collection": setattr(sim_params.physx, opt, gymapi.ContactCollection(config_sim["physx"][opt])) else: setattr(sim_params.physx, opt, config_sim["physx"][opt]) else: # set the parameters if "flex" in config_sim: for opt in config_sim["flex"].keys(): setattr(sim_params.flex, opt, config_sim["flex"][opt]) # return the configured params return sim_params """ Domain Randomization methods """ def get_actor_params_info(self, dr_params: Dict[str, Any], env): """Generate a flat array of actor params, their names and ranges. Returns: The array """ if "actor_params" not in dr_params: return None params = [] names = [] lows = [] highs = [] param_getters_map = get_property_getter_map(self.gym) for actor, actor_properties in dr_params["actor_params"].items(): handle = self.gym.find_actor_handle(env, actor) for prop_name, prop_attrs in actor_properties.items(): if prop_name == 'color': continue # this is set randomly props = param_getters_map[prop_name](env, handle) if not isinstance(props, list): props = [props] for prop_idx, prop in enumerate(props): for attr, attr_randomization_params in prop_attrs.items(): name = prop_name+'_' + str(prop_idx) + '_'+attr lo_hi = attr_randomization_params['range'] distr = attr_randomization_params['distribution'] if 'uniform' not in distr: lo_hi = (-1.0*float('Inf'), float('Inf')) if isinstance(prop, np.ndarray): for attr_idx in range(prop[attr].shape[0]): params.append(prop[attr][attr_idx]) names.append(name+'_'+str(attr_idx)) lows.append(lo_hi[0]) highs.append(lo_hi[1]) else: params.append(getattr(prop, attr)) names.append(name) lows.append(lo_hi[0]) highs.append(lo_hi[1]) return params, names, lows, highs def apply_randomizations(self, dr_params): """Apply domain randomizations to the environment. Note that currently we can only apply randomizations only on resets, due to current PhysX limitations Args: dr_params: parameters for domain randomization to use. """ # If we don't have a randomization frequency, randomize every step rand_freq = dr_params.get("frequency", 1) # First, determine what to randomize: # - non-environment parameters when > frequency steps have passed since the last non-environment # - physical environments in the reset buffer, which have exceeded the randomization frequency threshold # - on the first call, randomize everything self.last_step = self.gym.get_frame_count(self.sim) if self.first_randomization: do_nonenv_randomize = True env_ids = list(range(self.num_envs)) else: do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf)) rand_envs = torch.logical_and(rand_envs, self.reset_buf) env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist() self.randomize_buf[rand_envs] = 0 if do_nonenv_randomize: self.last_rand_step = self.last_step param_setters_map = get_property_setter_map(self.gym) param_setter_defaults_map = get_default_setter_args(self.gym) param_getters_map = get_property_getter_map(self.gym) # On first iteration, check the number of buckets if self.first_randomization: check_buckets(self.gym, self.envs, dr_params) for nonphysical_param in ["observations", "actions"]: if nonphysical_param in dr_params and do_nonenv_randomize: dist = dr_params[nonphysical_param]["distribution"] op_type = dr_params[nonphysical_param]["operation"] sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None op = operator.add if op_type == 'additive' else operator.mul if sched_type == 'linear': sched_scaling = 1.0 / sched_step * \ min(self.last_step, sched_step) elif sched_type == 'constant': sched_scaling = 0 if self.last_step < sched_step else 1 else: sched_scaling = 1 if dist == 'gaussian': mu, var = dr_params[nonphysical_param]["range"] mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) if op_type == 'additive': mu *= sched_scaling var *= sched_scaling mu_corr *= sched_scaling var_corr *= sched_scaling elif op_type == 'scaling': var = var * sched_scaling # scale up var over time mu = mu * sched_scaling + 1.0 * \ (1.0 - sched_scaling) # linearly interpolate var_corr = var_corr * sched_scaling # scale up var over time mu_corr = mu_corr * sched_scaling + 1.0 * \ (1.0 - sched_scaling) # linearly interpolate def noise_lambda(tensor, param_name=nonphysical_param): params = self.dr_randomizations[param_name] corr = params.get('corr', None) if corr is None: corr = torch.randn_like(tensor) params['corr'] = corr corr = corr * params['var_corr'] + params['mu_corr'] return op( tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu']) self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda} elif dist == 'uniform': lo, hi = dr_params[nonphysical_param]["range"] lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) if op_type == 'additive': lo *= sched_scaling hi *= sched_scaling lo_corr *= sched_scaling hi_corr *= sched_scaling elif op_type == 'scaling': lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling) hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling) lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) def noise_lambda(tensor, param_name=nonphysical_param): params = self.dr_randomizations[param_name] corr = params.get('corr', None) if corr is None: corr = torch.randn_like(tensor) params['corr'] = corr corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr'] return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo']) self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda} if "sim_params" in dr_params and do_nonenv_randomize: prop_attrs = dr_params["sim_params"] prop = self.gym.get_sim_params(self.sim) if self.first_randomization: self.original_props["sim_params"] = { attr: getattr(prop, attr) for attr in dir(prop)} for attr, attr_randomization_params in prop_attrs.items(): apply_random_samples( prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step) self.gym.set_sim_params(self.sim, prop) # If self.actor_params_generator is initialized: use it to # sample actor simulation params. This gives users the # freedom to generate samples from arbitrary distributions, # e.g. use full-covariance distributions instead of the DR's # default of treating each simulation parameter independently. extern_offsets = {} if self.actor_params_generator is not None: for env_id in env_ids: self.extern_actor_params[env_id] = \ self.actor_params_generator.sample() extern_offsets[env_id] = 0 for actor, actor_properties in dr_params["actor_params"].items(): for env_id in env_ids: env = self.envs[env_id] handle = self.gym.find_actor_handle(env, actor) extern_sample = self.extern_actor_params[env_id] for prop_name, prop_attrs in actor_properties.items(): if prop_name == 'color': num_bodies = self.gym.get_actor_rigid_body_count( env, handle) for n in range(num_bodies): self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL, gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))) continue if prop_name == 'scale': setup_only = prop_attrs.get('setup_only', False) if (setup_only and not self.sim_initialized) or not setup_only: attr_randomization_params = prop_attrs sample = generate_random_samples(attr_randomization_params, 1, self.last_step, None) og_scale = 1 if attr_randomization_params['operation'] == 'scaling': new_scale = og_scale * sample elif attr_randomization_params['operation'] == 'additive': new_scale = og_scale + sample self.gym.set_actor_scale(env, handle, new_scale) continue prop = param_getters_map[prop_name](env, handle) set_random_properties = True if isinstance(prop, list): if self.first_randomization: self.original_props[prop_name] = [ {attr: getattr(p, attr) for attr in dir(p)} for p in prop] for p, og_p in zip(prop, self.original_props[prop_name]): for attr, attr_randomization_params in prop_attrs.items(): setup_only = attr_randomization_params.get('setup_only', False) if (setup_only and not self.sim_initialized) or not setup_only: smpl = None if self.actor_params_generator is not None: smpl, extern_offsets[env_id] = get_attr_val_from_sample( extern_sample, extern_offsets[env_id], p, attr) apply_random_samples( p, og_p, attr, attr_randomization_params, self.last_step, smpl) else: set_random_properties = False else: if self.first_randomization: self.original_props[prop_name] = deepcopy(prop) for attr, attr_randomization_params in prop_attrs.items(): setup_only = attr_randomization_params.get('setup_only', False) if (setup_only and not self.sim_initialized) or not setup_only: smpl = None if self.actor_params_generator is not None: smpl, extern_offsets[env_id] = get_attr_val_from_sample( extern_sample, extern_offsets[env_id], prop, attr) apply_random_samples( prop, self.original_props[prop_name], attr, attr_randomization_params, self.last_step, smpl) else: set_random_properties = False if set_random_properties: setter = param_setters_map[prop_name] default_args = param_setter_defaults_map[prop_name] setter(env, handle, prop, *default_args) if self.actor_params_generator is not None: for env_id in env_ids: # check that we used all dims in sample if extern_offsets[env_id] > 0: extern_sample = self.extern_actor_params[env_id] if extern_offsets[env_id] != extern_sample.shape[0]: print('env_id', env_id, 'extern_offset', extern_offsets[env_id], 'vs extern_sample.shape', extern_sample.shape) raise Exception("Invalid extern_sample size") self.first_randomization = False ================================================ FILE: timechamber/tasks/data/assets/mjcf/amp_humanoid_sword_shield.xml ================================================ ================================================ FILE: timechamber/tasks/data/models/llc_reallusion_sword_shield.pth ================================================ [File too large to display: 80.6 MB] ================================================ FILE: timechamber/tasks/data/motions/reallusion_sword_shield/README.txt ================================================ This motion data is provided courtesy of Reallusion, strictly for noncommercial use. The original motion data is available at: https://actorcore.reallusion.com/motion/pack/studio-mocap-sword-and-shield-stunts https://actorcore.reallusion.com/motion/pack/studio-mocap-sword-and-shield-moves ================================================ FILE: timechamber/tasks/data/motions/reallusion_sword_shield/dataset_reallusion_sword_shield.yaml ================================================ motions: - file: "RL_Avatar_Atk_2xCombo01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_2xCombo02_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_2xCombo03_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_2xCombo04_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_2xCombo05_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo02_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo03_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo04_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo05_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo06_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_3xCombo07_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_4xCombo01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_4xCombo02_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_4xCombo03_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_SlashDown_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_SlashLeft_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_SlashRight_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_SlashUp_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_Spin_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_Stab_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Counter_Atk01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Counter_Atk02_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Counter_Atk03_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Counter_Atk04_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Kill_2xCombo01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Kill_2xCombo02_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Kill_3xCombo01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Kill_3xCombo02_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Kill_4xCombo01_Motion.npy" weight: 0.00724638 - file: "RL_Avatar_Atk_Jump_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Atk_Kick_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Atk_ShieldCharge_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Atk_ShieldSwipe01_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Atk_ShieldSwipe02_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Counter_Atk05_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Standoff_Feint_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Dodge_Backward_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_RunBackward_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_WalkBackward01_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_WalkBackward02_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_Dodgle_Left_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_RunLeft_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_WalkLeft01_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_WalkLeft02_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_Dodgle_Right_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_RunRight_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_WalkRight01_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_WalkRight02_Motion.npy" weight: 0.01552795 - file: "RL_Avatar_RunForward_Motion.npy" weight: 0.02070393 - file: "RL_Avatar_WalkForward01_Motion.npy" weight: 0.02070393 - file: "RL_Avatar_WalkForward02_Motion.npy" weight: 0.02070393 - file: "RL_Avatar_Standoff_Circle_Motion.npy" weight: 0.06211180 - file: "RL_Avatar_TurnLeft90_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_TurnLeft180_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_TurnRight90_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_TurnRight180_Motion.npy" weight: 0.03105590 - file: "RL_Avatar_Fall_Backward_Motion.npy" weight: 0.00869565 - file: "RL_Avatar_Fall_Left_Motion.npy" weight: 0.00869565 - file: "RL_Avatar_Fall_Right_Motion.npy" weight: 0.00869565 - file: "RL_Avatar_Fall_SpinLeft_Motion.npy" weight: 0.00869565 - file: "RL_Avatar_Fall_SpinRight_Motion.npy" weight: 0.00869565 - file: "RL_Avatar_Idle_Alert(0)_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Idle_Alert_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Idle_Battle(0)_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Idle_Battle_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Idle_Ready(0)_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Idle_Ready_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Standoff_Swing_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Taunt_PoundChest_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Taunt_Roar_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Taunt_ShieldKnock_Motion.npy" weight: 0.00434783 - file: "RL_Avatar_Shield_BlockBackward_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Shield_BlockCrouch_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Shield_BlockDown_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Shield_BlockLeft_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Shield_BlockRight_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Shield_BlockUp_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryBackward01_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryBackward02_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryBackward03_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryBackward04_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryCrouch_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryDown_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryLeft_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryRight_Motion.npy" weight: 0.00289855 - file: "RL_Avatar_Sword_ParryUp_Motion.npy" weight: 0.00289855 ================================================ FILE: timechamber/tasks/ma_ant_battle.py ================================================ from typing import Tuple import os import torch from isaacgym import gymtorch from isaacgym.gymtorch import * from timechamber.utils.torch_jit_utils import * from .base.ma_vec_task import MA_VecTask class MA_Ant_Battle(MA_VecTask): def __init__(self, cfg, sim_device, rl_device, graphics_device_id, headless, virtual_screen_capture, force_render): self.extras = None self.cfg = cfg self.randomization_params = self.cfg["task"]["randomization_params"] self.randomize = self.cfg["task"]["randomize"] self.max_episode_length = self.cfg["env"]["episodeLength"] self.termination_height = self.cfg["env"]["terminationHeight"] self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"] self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"] self.plane_restitution = self.cfg["env"]["plane"]["restitution"] self.action_scale = self.cfg["env"]["control"]["actionScale"] self.joints_at_limit_cost_scale = self.cfg["env"]["jointsAtLimitCost"] self.dof_vel_scale = self.cfg["env"]["dofVelocityScale"] self.ant_agents_state = [] self.win_reward_scale = 2000 self.move_to_op_reward_scale = 1. self.stay_in_center_reward_scale = 0.2 self.action_cost_scale = -0.000025 self.push_scale = 1. self.dense_reward_scale = 1.0 self.hp_decay_scale = 1. self.Kp = self.cfg["env"]["control"]["stiffness"] self.Kd = self.cfg["env"]["control"]["damping"] self.cfg["env"]["numObservations"] = 32 + 27 * (self.cfg["env"].get("numAgents", 1) - 1) self.cfg["env"]["numActions"] = 8 self.borderline_space = cfg["env"]["borderlineSpace"] self.borderline_space_unit = self.borderline_space / self.max_episode_length self.ant_body_colors = [gymapi.Vec3(*rgb_arr) for rgb_arr in self.cfg["env"]["color"]] super().__init__(config=self.cfg, sim_device=sim_device, rl_device=rl_device, graphics_device_id=graphics_device_id, headless=headless) self.use_central_value = False self.obs_idxs = torch.eye(4, dtype=torch.float32, device=self.device) if self.viewer is not None: for i, env in enumerate(self.envs): self._add_circle_borderline(env, self.borderline_space) cam_pos = gymapi.Vec3(15.0, 0.0, 3.4) cam_target = gymapi.Vec3(10.0, 0.0, 0.0) self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) # get gym GPU state tensors actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim) sensors_per_env = 4 self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, sensors_per_env * 6) self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.root_states = gymtorch.wrap_tensor(actor_root_state) print(f'root_states:{self.root_states.shape}') self.initial_root_states = self.root_states.clone() self.initial_root_states[:, 7:13] = 0 # set lin_vel and ang_vel to 0 # create some wrapper tensors for different slices self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) print(f'dof:{self.dof_state.shape}') dof_state_shaped = self.dof_state.view(self.num_envs, -1, 2) for idx in range(self.num_agents): ant_root_state = self.root_states[idx::self.num_agents] ant_dof_pos = dof_state_shaped[:, idx * self.num_dof:(idx + 1) * self.num_dof, 0] ant_dof_vel = dof_state_shaped[:, idx * self.num_dof:(idx + 1) * self.num_dof, 1] self.ant_agents_state.append((ant_root_state, ant_dof_pos, ant_dof_vel)) self.initial_dof_pos = torch.zeros_like(self.ant_agents_state[0][1], device=self.device, dtype=torch.float) zero_tensor = torch.tensor([0.0], device=self.device) self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower, torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, self.initial_dof_pos)) self.initial_dof_vel = torch.zeros_like(self.ant_agents_state[0][2], device=self.device, dtype=torch.float) self.dt = self.cfg["sim"]["dt"] torques = self.gym.acquire_dof_force_tensor(self.sim) self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, self.num_agents * self.num_dof) self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat( (self.num_agents * self.num_envs, 1)) self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat( (self.num_agents * self.num_envs, 1)) self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat( (self.num_agents * self.num_envs, 1)) def allocate_buffers(self): self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device, dtype=torch.float) self.rew_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.float) self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) self.timeout_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.progress_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.randomize_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.extras = {'ranks': torch.zeros((self.num_envs, self.num_agents), device=self.device, dtype=torch.long), 'win': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device, dtype=torch.bool), 'lose': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device, dtype=torch.bool), 'draw': torch.zeros((self.num_envs * (self.num_agents - 1),), device=self.device, dtype=torch.bool)} def create_sim(self): self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z') self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) lines = [] borderline_height = 0.01 for height in range(20): for angle in range(360): begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height] end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height] lines.append(begin_point) lines.append(end_point) self.lines = np.array(lines, dtype=np.float32) self._create_ground_plane() print(f'num envs {self.num_envs} env spacing {self.cfg["env"]["envSpacing"]}') self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs))) # If randomizing, apply once immediately on startup before the fist sim step if self.randomize: self.apply_randomizations(self.randomization_params) def _add_circle_borderline(self, env, radius): lines = self.lines * radius colors = np.array([[1, 0, 0]] * (len(lines) // 2), dtype=np.float32) self.gym.add_lines(self.viewer, env, len(lines) // 2, lines, colors) def _create_ground_plane(self): plane_params = gymapi.PlaneParams() plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) plane_params.static_friction = self.plane_static_friction plane_params.dynamic_friction = self.plane_dynamic_friction self.gym.add_ground(self.sim, plane_params) def _create_envs(self, num_envs, spacing, num_per_row): lower = gymapi.Vec3(-spacing, -spacing, 0.0) upper = gymapi.Vec3(spacing, spacing, spacing) asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets') asset_file = "mjcf/nv_ant.xml" if "asset" in self.cfg["env"]: asset_file = self.cfg["env"]["asset"].get("assetFileName", asset_file) asset_path = os.path.join(asset_root, asset_file) asset_root = os.path.dirname(asset_path) asset_file = os.path.basename(asset_path) asset_options = gymapi.AssetOptions() # Note - DOF mode is set in the MJCF file and loaded by Isaac Gym asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE asset_options.angular_damping = 0.0 ant_assets = [] for _ in range(self.num_agents): ant_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) ant_assets.append(ant_asset) dof_props = self.gym.get_asset_dof_properties(ant_assets[0]) self.num_dof = self.gym.get_asset_dof_count(ant_assets[0]) self.num_bodies = self.gym.get_asset_rigid_body_count(ant_assets[0]) for i in range(self.num_dof): dof_props['driveMode'][i] = gymapi.DOF_MODE_POS dof_props['stiffness'][i] = self.Kp dof_props['damping'][i] = self.Kd start_pose = gymapi.Transform() start_pose.p = gymapi.Vec3(-self.borderline_space + 1, -self.borderline_space + 1, 1.) self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], device=self.device) self.torso_index = 0 self.num_bodies = self.gym.get_asset_rigid_body_count(ant_assets[0]) body_names = [self.gym.get_asset_rigid_body_name(ant_assets[0], i) for i in range(self.num_bodies)] extremity_names = [s for s in body_names if "foot" in s] self.extremities_index = torch.zeros(len(extremity_names), dtype=torch.long, device=self.device) print(body_names, extremity_names, self.extremities_index) # create force sensors attached to the "feet" extremity_indices = [self.gym.find_asset_rigid_body_index(ant_assets[0], name) for name in extremity_names] sensor_pose = gymapi.Transform() for body_idx in extremity_indices: self.gym.create_asset_force_sensor(ant_assets[0], body_idx, sensor_pose) self.ant_handles = [] self.actor_indices = [] self.envs = [] self.dof_limits_lower = [] self.dof_limits_upper = [] for i in range(self.num_envs): # create env instance env_ptr = self.gym.create_env( self.sim, lower, upper, num_per_row ) # create actor instance for j in range(self.num_agents): ant_handle = self.gym.create_actor(env_ptr, ant_assets[j], start_pose, "ant_" + str(j), i, -1, 0) actor_index = self.gym.get_actor_index(env_ptr, ant_handle, gymapi.DOMAIN_SIM) self.gym.set_actor_dof_properties(env_ptr, ant_handle, dof_props) self.actor_indices.append(actor_index) self.gym.enable_actor_dof_force_sensors(env_ptr, ant_handle) self.ant_handles.append(ant_handle) for k in range(self.num_bodies): self.gym.set_rigid_body_color( env_ptr, ant_handle, k, gymapi.MESH_VISUAL, self.ant_body_colors[j]) self.envs.append(env_ptr) dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.ant_handles[0]) for j in range(self.num_dof): if dof_prop['lower'][j] > dof_prop['upper'][j]: self.dof_limits_lower.append(dof_prop['upper'][j]) self.dof_limits_upper.append(dof_prop['lower'][j]) else: self.dof_limits_lower.append(dof_prop['lower'][j]) self.dof_limits_upper.append(dof_prop['upper'][j]) self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device) self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device) self.actor_indices = to_torch(self.actor_indices, device=self.device).to(dtype=torch.int32) for i in range(len(extremity_names)): self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], extremity_names[i]) def compute_reward(self, actions): self.rew_buf[:], self.reset_buf[:], self.extras['ranks'][:], self.extras['win'], self.extras['lose'], \ self.extras[ 'draw'] = compute_ant_reward( self.obs_buf, self.reset_buf, self.progress_buf, self.torques, self.extras['ranks'], self.termination_height, self.max_episode_length, self.borderline_space, self.borderline_space_unit, self.win_reward_scale, self.stay_in_center_reward_scale, self.action_cost_scale, self.push_scale, self.joints_at_limit_cost_scale, self.dense_reward_scale, self.dt, self.num_agents ) def compute_observations(self): self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_force_sensor_tensor(self.sim) self.gym.refresh_dof_force_tensor(self.sim) for agent_idx in range(self.num_agents): self.obs_buf[agent_idx * self.num_envs:(agent_idx + 1) * self.num_envs, :] = compute_ant_observations( self.ant_agents_state, self.progress_buf, self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale, self.termination_height, self.borderline_space_unit, self.borderline_space, self.num_agents, agent_idx, ) def reset_idx(self, env_ids): # print('reset.....', env_ids) # Randomization can happen only at reset time, since it can reset actor positions on GPU if self.randomize: self.apply_randomizations(self.randomization_params) positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device) velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device) for agent_idx in range(self.num_agents): root_state, dof_pos, dof_vel = self.ant_agents_state[agent_idx] dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper) dof_vel[env_ids] = velocities agent_env_ids = expand_env_ids(env_ids, self.num_agents) env_ids_int32 = self.actor_indices[agent_env_ids] rand_angle = torch.rand((len(env_ids),), device=self.device) * torch.pi * 2 # generate angle in 0-360 rand_pos = (self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand((len(agent_env_ids), 2), device=self.device)) unit_angle = 2 * torch.pi / self.num_agents for agent_idx in range(self.num_agents): rand_pos[agent_idx::self.num_agents, 0] *= torch.cos(rand_angle + agent_idx * unit_angle) rand_pos[agent_idx::self.num_agents, 1] *= torch.sin(rand_angle + agent_idx * unit_angle) rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 1), device=self.device) rand_rotation = quat_from_angle_axis(rand_floats[:, 0] * np.pi, self.z_unit_tensor[agent_env_ids]) self.root_states[agent_env_ids] = self.initial_root_states[agent_env_ids] self.root_states[agent_env_ids, :2] = rand_pos self.root_states[agent_env_ids, 3:7] = rand_rotation self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.root_states), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.dof_state), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) self.progress_buf[env_ids] = 0 self.reset_buf[env_ids] = 0 self.extras['ranks'][env_ids] = 0 def pre_physics_step(self, actions): # actions.shape = [num_envs * num_agents, num_actions], stacked as followed: # {[(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env0), # [(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env1), # ... } self.actions = torch.tensor([], device=self.device) for agent_idx in range(self.num_agents): self.actions = torch.cat((self.actions, actions[agent_idx * self.num_envs:(agent_idx + 1) * self.num_envs]), dim=-1) tmp_actions = self.extras['ranks'].unsqueeze(-1).repeat_interleave(self.num_actions, dim=-1).view(self.num_envs, self.num_actions * self.num_agents) zero_actions = torch.zeros_like(tmp_actions, dtype=torch.float) self.actions = torch.where(tmp_actions > 0, zero_actions, self.actions) # reshape [num_envs * num_agents, num_actions] to [num_envs, num_agents * num_actions] print(f'action_size{ targets = self.actions self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets)) def post_physics_step(self): self.progress_buf += 1 self.randomize_buf += 1 resets = self.reset_buf.reshape(self.num_envs, 1).sum(dim=1) # print(resets) env_ids = (resets == 1).nonzero(as_tuple=False).flatten() if len(env_ids) > 0: self.reset_idx(env_ids) self.compute_observations() self.compute_reward(self.actions) if self.viewer is not None: self.gym.clear_lines(self.viewer) for i, env in enumerate(self.envs): self._add_circle_borderline(env, self.borderline_space - self.borderline_space_unit * self.progress_buf[ i].item()) def get_number_of_agents(self): # only train 1 agent return 1 def zero_actions(self) -> torch.Tensor: """Returns a buffer with zero actions. Returns: A buffer of zero torch actions """ actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32, device=self.rl_device) self.extras['win'] = self.extras['lose'] = self.extras['draw'] = 0 return actions def clear_count(self): self.dense_reward_scale *= 0.9 self.extras['ranks'] = torch.zeros((self.num_agents, self.num_agents), device=self.device, dtype=torch.float) ##################################################################### ###=========================jit functions=========================### ##################################################################### @torch.jit.script def expand_env_ids(env_ids, n_agents): # type: (Tensor, int) -> Tensor device = env_ids.device # print(f'nanget:{n_agents}') agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long) for idx in range(n_agents): agent_env_ids[idx::n_agents] = env_ids * n_agents + idx return agent_env_ids @torch.jit.script def compute_ant_reward( obs_buf, reset_buf, progress_buf, torques, now_rank, termination_height, max_episode_length, borderline_space, borderline_space_unit, win_reward_scale, stay_in_center_reward_scale, action_cost_scale, push_scale, joints_at_limit_cost_scale, dense_reward_scale, dt, num_agents ): # type: (Tensor, Tensor, Tensor,Tensor,Tensor,float,float,float,float,float,float,float,float,float,float,float,int) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor,Tensor] obs = obs_buf.view(num_agents, -1, obs_buf.shape[1]) nxt_rank_val = num_agents - torch.count_nonzero(now_rank, dim=-1).view(-1, 1).repeat_interleave(num_agents, dim=-1) is_out = torch.sum(torch.square(obs[:, :, 0:2]), dim=-1) >= \ (borderline_space - progress_buf * borderline_space_unit).square() nxt_rank = torch.where((torch.transpose(is_out, 0, 1) > 0) & (now_rank == 0), nxt_rank_val, now_rank) # reset agents tmp_ones = torch.ones_like(reset_buf) reset = torch.where(is_out[0, :], tmp_ones, reset_buf) reset = torch.where(progress_buf >= max_episode_length - 1, tmp_ones, reset) reset = torch.where(torch.min(is_out[1:], dim=0).values, tmp_ones, reset) tmp_reset = reset.view(-1, 1).repeat_interleave(num_agents, dim=-1) nxt_rank = torch.where((tmp_reset == 1) & (nxt_rank == 0), nxt_rank_val - 1, nxt_rank) # compute metric logic tmp_reset = reset.view(1, -1).repeat_interleave(num_agents - 1, dim=0) tmp_zeros = torch.zeros_like(is_out[1:], dtype=torch.bool) wins = torch.ones_like(is_out[1:], dtype=torch.bool) loses = torch.ones_like(is_out[1:], dtype=torch.bool) draws = (progress_buf >= max_episode_length - 1).view(1, -1).repeat_interleave(num_agents - 1, dim=0) wins = torch.where(is_out[1:], wins & (tmp_reset == 1), tmp_zeros) draws = torch.where(is_out[1:] == 0, draws & (tmp_reset == 1), tmp_zeros) loses = torch.where(is_out[1:] == 0, loses & (tmp_reset == 1) & (draws == 0), tmp_zeros) sparse_reward = 1.0 * reset reward_per_rank = 2 * win_reward_scale / (num_agents - 1) sparse_reward = sparse_reward * (win_reward_scale - (nxt_rank[:, 0] - 1) * reward_per_rank) stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs[0, :, :2], dim=-1)) dof_at_limit_cost = torch.sum(obs[0, :, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale # print("torques:", torques[0, 2]) not_move_penalty = torch.exp(-torch.sum(torch.abs(torques), dim=1)) # print(f'action:...{action_cost_penalty.shape}') dense_reward = dof_at_limit_cost + action_cost_penalty + not_move_penalty + stay_in_center_reward total_reward = sparse_reward + dense_reward * dense_reward_scale return total_reward, reset, nxt_rank, wins.flatten(), loses.flatten(), draws.flatten() @torch.jit.script def compute_ant_observations( ant_agents_state, progress_buf, dof_limits_lower, dof_limits_upper, dof_vel_scale, termination_height, borderline_space_unit, borderline_space, num_agents, agent_idx, ): # type: (List[Tuple[Tensor,Tensor,Tensor]],Tensor,Tensor,Tensor,float,float,float,float,int,int)->Tensor # tot length:13+8+8+1+1+(num_agents-1)*(7+2+8+8+1) self_root_state, self_dof_pos, self_dof_vel = ant_agents_state[agent_idx] dof_pos_scaled = unscale(self_dof_pos, dof_limits_lower, dof_limits_upper) now_border_space = (borderline_space - progress_buf * borderline_space_unit).unsqueeze(-1) obs = torch.cat((self_root_state[:, :13], dof_pos_scaled, self_dof_vel * dof_vel_scale, now_border_space - torch.sqrt(torch.sum(self_root_state[:, :2].square(), dim=-1)).unsqueeze(-1), # dis to border now_border_space, torch.unsqueeze(self_root_state[:, 2] < termination_height, -1)), dim=-1) for op_idx in range(num_agents): if op_idx == agent_idx: continue op_root_state, op_dof_pos, op_dof_vel = ant_agents_state[op_idx] dof_pos_scaled = unscale(op_dof_pos, dof_limits_lower, dof_limits_upper) obs = torch.cat((obs, op_root_state[:, :7], self_root_state[:, :2] - op_root_state[:, :2], dof_pos_scaled, op_dof_vel * dof_vel_scale, now_border_space - torch.sqrt(torch.sum(op_root_state[:, :2].square(), dim=-1)).unsqueeze(-1), torch.unsqueeze(op_root_state[:, 2] < termination_height, -1)), dim=-1) # print(obs.shape) return obs @torch.jit.script def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor): return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)) ================================================ FILE: timechamber/tasks/ma_ant_sumo.py ================================================ from typing import Tuple import numpy as np import os import math import torch import random from isaacgym import gymtorch from isaacgym import gymapi from isaacgym.gymtorch import * # from torch.tensor import Tensor from timechamber.utils.torch_jit_utils import * from .base.vec_task import VecTask from .base.ma_vec_task import MA_VecTask # todo critic_state full obs class MA_Ant_Sumo(MA_VecTask): def __init__(self, cfg, sim_device, rl_device, graphics_device_id, headless, virtual_screen_capture, force_render): self.cfg = cfg self.randomization_params = self.cfg["task"]["randomization_params"] self.randomize = self.cfg["task"]["randomize"] self.max_episode_length = self.cfg["env"]["episodeLength"] self.termination_height = self.cfg["env"]["terminationHeight"] self.borderline_space = cfg["env"]["borderlineSpace"] self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"] self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"] self.plane_restitution = self.cfg["env"]["plane"]["restitution"] self.action_scale = self.cfg["env"]["control"]["actionScale"] self.joints_at_limit_cost_scale = self.cfg["env"]["jointsAtLimitCost"] self.dof_vel_scale = self.cfg["env"]["dofVelocityScale"] self.draw_penalty_scale = -1000 self.win_reward_scale = 2000 self.move_to_op_reward_scale = 1. self.stay_in_center_reward_scale = 0.2 self.action_cost_scale = -0.000025 self.push_scale = 1. self.dense_reward_scale = 1. self.hp_decay_scale = 1. self.Kp = self.cfg["env"]["control"]["stiffness"] self.Kd = self.cfg["env"]["control"]["damping"] # see func: compute_ant_observations() for details # self.cfg["env"]["numObservations"] = 48 # dof pos(2) + dof vel(2) + dof action(2) + feet force sensor(force&torque, 6) self.cfg["env"][ "numObservations"] = 40 self.cfg["env"]["numActions"] = 8 self.cfg["env"]["numAgents"] = 2 self.use_central_value = False super().__init__(config=self.cfg, sim_device=sim_device, rl_device=rl_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render) if self.viewer is not None: for env in self.envs: self._add_circle_borderline(env) cam_pos = gymapi.Vec3(15.0, 0.0, 3.0) cam_target = gymapi.Vec3(10.0, 0.0, 0.0) self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) # get gym GPU state tensors actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim) sensors_per_env = 4 self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs * self.num_agents, sensors_per_env * 6) self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.root_states = gymtorch.wrap_tensor(actor_root_state) print(f'root_states:{self.root_states.shape}') self.initial_root_states = self.root_states.clone() self.initial_root_states[:, 7:13] = 0 # set lin_vel and ang_vel to 0 # create some wrapper tensors for different slices self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) print(f"dof state shape: {self.dof_state.shape}") self.dof_pos = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_dof, 0] self.dof_pos_op = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_dof:2 * self.num_dof, 0] self.dof_vel = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_dof, 1] self.dof_vel_op = self.dof_state.view(self.num_envs, -1, 2)[:, self.num_dof:2 * self.num_dof, 1] self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float) zero_tensor = torch.tensor([0.0], device=self.device) self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower, torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, self.initial_dof_pos)) self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float) self.dt = self.cfg["sim"]["dt"] torques = self.gym.acquire_dof_force_tensor(self.sim) self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, 2 * self.num_dof) self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1)) self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1)) self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((2 * self.num_envs, 1)) self.hp = torch.ones((self.num_envs,), device=self.device, dtype=torch.float32) * 100 self.hp_op = torch.ones((self.num_envs,), device=self.device, dtype=torch.float32) * 100 def allocate_buffers(self): self.obs_buf = torch.zeros((self.num_agents * self.num_envs, self.num_obs), device=self.device, dtype=torch.float) self.rew_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.float) self.reset_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) self.timeout_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.progress_buf = torch.zeros( self.num_envs, device=self.device, dtype=torch.long) self.randomize_buf = torch.zeros( self.num_envs * self.num_agents, device=self.device, dtype=torch.long) self.extras = { 'win': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool), 'lose': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool), 'draw': torch.zeros(((self.num_agents - 1) * self.num_envs,), device=self.device, dtype=torch.bool)} def create_sim(self): self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z') self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) self._create_ground_plane() print(f'num envs {self.num_envs} env spacing {self.cfg["env"]["envSpacing"]}') self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs))) # If randomizing, apply once immediately on startup before the fist sim step if self.randomize: self.apply_randomizations(self.randomization_params) def _add_circle_borderline(self, env): lines = [] borderline_height = 0.01 for height in range(20): for angle in range(360): begin_point = [np.cos(np.radians(angle)), np.sin(np.radians(angle)), borderline_height * height] end_point = [np.cos(np.radians(angle + 1)), np.sin(np.radians(angle + 1)), borderline_height * height] lines.append(begin_point) lines.append(end_point) lines = np.array(lines, dtype=np.float32) * self.borderline_space colors = np.array([[1, 0, 0]] * int(len(lines) / 2), dtype=np.float32) self.gym.add_lines(self.viewer, env, int(len(lines) / 2), lines, colors) def _create_ground_plane(self): plane_params = gymapi.PlaneParams() plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) plane_params.static_friction = self.plane_static_friction plane_params.dynamic_friction = self.plane_dynamic_friction self.gym.add_ground(self.sim, plane_params) def _create_envs(self, num_envs, spacing, num_per_row): lower = gymapi.Vec3(-spacing, -spacing, 0.0) upper = gymapi.Vec3(spacing, spacing, spacing) asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets') asset_file = "mjcf/nv_ant.xml" if "asset" in self.cfg["env"]: asset_file = self.cfg["env"]["asset"].get("assetFileName", asset_file) asset_path = os.path.join(asset_root, asset_file) asset_root = os.path.dirname(asset_path) asset_file = os.path.basename(asset_path) asset_options = gymapi.AssetOptions() # Note - DOF mode is set in the MJCF file and loaded by Isaac Gym asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE asset_options.angular_damping = 0.0 ant_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) ant_asset_op = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) dof_props = self.gym.get_asset_dof_properties(ant_asset) self.num_dof = self.gym.get_asset_dof_count(ant_asset) self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset) # 9 = 4 x 2(front&back-end legs) + 1(torso) for i in range(self.num_dof): dof_props['driveMode'][i] = gymapi.DOF_MODE_POS dof_props['stiffness'][i] = self.Kp dof_props['damping'][i] = self.Kd box_pose = gymapi.Transform() box_pose.p = gymapi.Vec3(0, 0, 0) start_pose = gymapi.Transform() start_pose.p = gymapi.Vec3(-self.borderline_space + 1, -self.borderline_space + 1, 1.) start_pose_op = gymapi.Transform() start_pose_op.p = gymapi.Vec3(self.borderline_space - 1, self.borderline_space - 1, 1.) print(start_pose.p, start_pose_op.p) self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], device=self.device) self.torso_index = 0 self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset) body_names = [self.gym.get_asset_rigid_body_name(ant_asset, i) for i in range(self.num_bodies)] extremity_names = [s for s in body_names if "foot" in s] self.extremities_index = torch.zeros(len(extremity_names), dtype=torch.long, device=self.device) # create force sensors attached to the "feet" extremity_indices = [self.gym.find_asset_rigid_body_index(ant_asset, name) for name in extremity_names] sensor_pose = gymapi.Transform() sensor_pose_op = gymapi.Transform() for body_idx in extremity_indices: self.gym.create_asset_force_sensor(ant_asset, body_idx, sensor_pose) self.gym.create_asset_force_sensor(ant_asset_op, body_idx, sensor_pose_op) self.ant_handles = [] self.actor_indices = [] self.actor_indices_op = [] self.actor_handles_op = [] self.envs = [] self.pos_before = torch.zeros(2, device=self.device) self.dof_limits_lower = [] self.dof_limits_upper = [] for i in range(self.num_envs): # create env instance env_ptr = self.gym.create_env( self.sim, lower, upper, num_per_row ) ant_handle = self.gym.create_actor(env_ptr, ant_asset, start_pose, "ant", i, -1, 0) actor_index = self.gym.get_actor_index(env_ptr, ant_handle, gymapi.DOMAIN_SIM) self.gym.set_actor_dof_properties(env_ptr, ant_handle, dof_props) self.actor_indices.append(actor_index) self.gym.enable_actor_dof_force_sensors(env_ptr, ant_handle) ant_handle_op = self.gym.create_actor(env_ptr, ant_asset_op, start_pose_op, "ant_op", i, -1, 0) actor_index_op = self.gym.get_actor_index(env_ptr, ant_handle_op, gymapi.DOMAIN_SIM) self.gym.set_actor_dof_properties(env_ptr, ant_handle_op, dof_props) self.actor_indices_op.append(actor_index_op) for j in range(self.num_bodies): self.gym.set_rigid_body_color( env_ptr, ant_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.97, 0.38, 0.06)) self.gym.set_rigid_body_color( env_ptr, ant_handle_op, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.24, 0.38, 0.06)) self.envs.append(env_ptr) self.ant_handles.append(ant_handle) self.actor_handles_op.append(ant_handle_op) dof_prop = self.gym.get_actor_dof_properties(env_ptr, ant_handle) for j in range(self.num_dof): if dof_prop['lower'][j] > dof_prop['upper'][j]: self.dof_limits_lower.append(dof_prop['upper'][j]) self.dof_limits_upper.append(dof_prop['lower'][j]) else: self.dof_limits_lower.append(dof_prop['lower'][j]) self.dof_limits_upper.append(dof_prop['upper'][j]) self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device) self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device) self.actor_indices = to_torch(self.actor_indices, dtype=torch.long, device=self.device) self.actor_indices_op = to_torch(self.actor_indices_op, dtype=torch.long, device=self.device) for i in range(len(extremity_names)): self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], extremity_names[i]) def compute_reward(self, actions): self.rew_buf[:], self.reset_buf[:], self.hp[:], self.hp_op[:], \ self.extras['win'], self.extras['lose'], self.extras['draw'] = compute_ant_reward( self.obs_buf[:self.num_envs], self.obs_buf[self.num_envs:], self.reset_buf, self.progress_buf, self.pos_before, self.torques[:, :self.num_dof], self.hp, self.hp_op, self.termination_height, self.max_episode_length, self.borderline_space, self.draw_penalty_scale, self.win_reward_scale, self.move_to_op_reward_scale, self.stay_in_center_reward_scale, self.action_cost_scale, self.push_scale, self.joints_at_limit_cost_scale, self.dense_reward_scale, self.hp_decay_scale, self.dt, ) def compute_observations(self): self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_force_sensor_tensor(self.sim) self.gym.refresh_dof_force_tensor(self.sim) self.obs_buf[:self.num_envs] = \ compute_ant_observations( self.root_states[0::2], self.root_states[1::2], self.dof_pos, self.dof_vel, self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale, self.termination_height ) self.obs_buf[self.num_envs:] = compute_ant_observations( self.root_states[1::2], self.root_states[0::2], self.dof_pos_op, self.dof_vel_op, self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale, self.termination_height ) def reset_idx(self, env_ids): # print('reset.....', env_ids) # Randomization can happen only at reset time, since it can reset actor positions on GPU if self.randomize: self.apply_randomizations(self.randomization_params) positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device) velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device) self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper) self.dof_vel[env_ids] = velocities self.dof_pos_op[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper) self.dof_vel_op[env_ids] = velocities env_ids_int32 = (torch.cat((self.actor_indices[env_ids], self.actor_indices_op[env_ids]))).to(dtype=torch.int32) agent_env_ids = expand_env_ids(env_ids, 2) rand_angle = torch.rand((len(env_ids),), device=self.device) * torch.pi * 2 rand_pos = torch.ones((len(agent_env_ids), 2), device=self.device) * ( self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand( (len(agent_env_ids), 2), device=self.device) * 2) rand_pos[0::2, 0] *= torch.cos(rand_angle) rand_pos[0::2, 1] *= torch.sin(rand_angle) rand_pos[1::2, 0] *= torch.cos(rand_angle + torch.pi) rand_pos[1::2, 1] *= torch.sin(rand_angle + torch.pi) rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 3), device=self.device) rand_rotation = quat_from_angle_axis(rand_floats[:, 1] * np.pi, self.z_unit_tensor[agent_env_ids]) rand_rotation2 = quat_from_angle_axis(rand_floats[:, 2] * np.pi, self.z_unit_tensor[agent_env_ids]) self.root_states[agent_env_ids] = self.initial_root_states[agent_env_ids] self.root_states[agent_env_ids, :2] = rand_pos self.root_states[agent_env_ids[1::2], 3:7] = rand_rotation[1::2] self.root_states[agent_env_ids[0::2], 3:7] = rand_rotation2[0::2] self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.root_states), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.dof_state), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) self.pos_before = self.root_states[0::2, :2].clone() self.progress_buf[env_ids] = 0 self.reset_buf[env_ids] = 0 def pre_physics_step(self, actions): # actions.shape = [num_envs * num_agents, num_actions], stacked as followed: # {[(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env0), # [(agent1_act_1, agent1_act2)|(agent2_act1, agent2_act2)|...]_(env1), # ... } self.actions = actions.clone().to(self.device) self.actions = torch.cat((self.actions[:self.num_envs], self.actions[self.num_envs:]), dim=-1) # reshape [num_envs * num_agents, num_actions] to [num_envs, num_agents * num_actions] targets = self.actions self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets)) def post_physics_step(self): self.progress_buf += 1 self.randomize_buf += 1 self.compute_observations() self.compute_reward(self.actions) self.pos_before = self.obs_buf[:self.num_envs, :2].clone() def get_number_of_agents(self): # train one agent with index 0 return 1 def zero_actions(self) -> torch.Tensor: """Returns a buffer with zero actions. Returns: A buffer of zero torch actions """ actions = torch.zeros([self.num_envs * self.num_agents, self.num_actions], dtype=torch.float32, device=self.rl_device) return actions def clear_count(self): self.dense_reward_scale *= 0.9 self.extras['win'][:] = 0 self.extras['draw'][:] = 0 ##################################################################### ###=========================jit functions=========================### ##################################################################### @torch.jit.script def expand_env_ids(env_ids, n_agents): # type: (Tensor, int) -> Tensor device = env_ids.device agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long) for idx in range(n_agents): agent_env_ids[idx::n_agents] = env_ids * n_agents + idx return agent_env_ids @torch.jit.script def compute_move_reward( pos, pos_before, target, dt, move_to_op_reward_scale ): # type: (Tensor,Tensor,Tensor,float,float) -> Tensor move_vec = (pos - pos_before) / dt direction = target - pos_before direction = torch.div(direction, torch.linalg.norm(direction, dim=-1).view(-1, 1)) s = torch.sum(move_vec * direction, dim=-1) return torch.maximum(s, torch.zeros_like(s)) * move_to_op_reward_scale @torch.jit.script def compute_ant_reward( obs_buf, obs_buf_op, reset_buf, progress_buf, pos_before, torques, hp, hp_op, termination_height, max_episode_length, borderline_space, draw_penalty_scale, win_reward_scale, move_to_op_reward_scale, stay_in_center_reward_scale, action_cost_scale, push_scale, joints_at_limit_cost_scale, dense_reward_scale, hp_decay_scale, dt, ): # type: (Tensor, Tensor, Tensor, Tensor,Tensor,Tensor,Tensor,Tensor,float, float,float, float,float,float,float,float,float,float,float,float,float) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor,Tensor,Tensor] hp -= (obs_buf[:, 2] < termination_height) * hp_decay_scale hp_op -= (obs_buf_op[:, 2] < termination_height) * hp_decay_scale is_out = torch.sum(torch.square(obs_buf[:, 0:2]), dim=-1) >= borderline_space ** 2 is_out_op = torch.sum(torch.square(obs_buf_op[:, 0:2]), dim=-1) >= borderline_space ** 2 is_out = is_out | (hp <= 0) is_out_op = is_out_op | (hp_op <= 0) # reset agents tmp_ones = torch.ones_like(reset_buf) reset = torch.where(is_out, tmp_ones, reset_buf) reset = torch.where(is_out_op, tmp_ones, reset) reset = torch.where(progress_buf >= max_episode_length - 1, tmp_ones, reset) hp = torch.where(reset > 0, tmp_ones * 100., hp) hp_op = torch.where(reset > 0, tmp_ones * 100., hp_op) win_reward = win_reward_scale * is_out_op lose_penalty = -win_reward_scale * is_out draw_penalty = torch.where(progress_buf >= max_episode_length - 1, tmp_ones * draw_penalty_scale, torch.zeros_like(reset, dtype=torch.float)) move_reward = compute_move_reward(obs_buf[:, 0:2], pos_before, obs_buf_op[:, 0:2], dt, move_to_op_reward_scale) # stay_in_center_reward = stay_in_center_reward_scale * torch.exp(-torch.linalg.norm(obs_buf[:, :2], dim=-1)) dof_at_limit_cost = torch.sum(obs_buf[:, 13:21] > 0.99, dim=-1) * joints_at_limit_cost_scale push_reward = -push_scale * torch.exp(-torch.linalg.norm(obs_buf_op[:, :2], dim=-1)) action_cost_penalty = torch.sum(torch.square(torques), dim=1) * action_cost_scale not_move_penalty = -10 * torch.exp(-torch.sum(torch.abs(torques), dim=1)) dense_reward = move_reward + dof_at_limit_cost + push_reward + action_cost_penalty + not_move_penalty total_reward = win_reward + lose_penalty + draw_penalty + dense_reward * dense_reward_scale return total_reward, reset, hp, hp_op, is_out_op, is_out, progress_buf >= max_episode_length - 1 @torch.jit.script def compute_ant_observations( root_states, root_states_op, dof_pos, dof_vel, dof_limits_lower, dof_limits_upper, dof_vel_scale, termination_height ): # type: (Tensor,Tensor,Tensor,Tensor,Tensor,Tensor,float,float)->Tensor dof_pos_scaled = unscale(dof_pos, dof_limits_lower, dof_limits_upper) obs = torch.cat( (root_states[:, :13], dof_pos_scaled, dof_vel * dof_vel_scale, root_states_op[:, :7], root_states[:, :2] - root_states_op[:, :2], torch.unsqueeze(root_states[:, 2] < termination_height, -1), torch.unsqueeze(root_states_op[:, 2] < termination_height, -1)), dim=-1) return obs @torch.jit.script def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor): return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)) ================================================ FILE: timechamber/tasks/ma_humanoid_strike.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from asyncio import shield from dis import dis import torch import math from isaacgym import gymapi, gymtorch from isaacgym.torch_utils import * import timechamber.tasks.ase_humanoid_base.humanoid_amp_task as humanoid_amp_task from timechamber.utils import torch_utils class HumanoidStrike(humanoid_amp_task.HumanoidAMPTask): def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): super().__init__(cfg=cfg, sim_params=sim_params, physics_engine=physics_engine, device_type=device_type, device_id=device_id, headless=headless) self.ego_to_op_damage = torch.zeros_like(self.reset_buf, device=self.device, dtype=torch.float) self.op_to_ego_damage = torch.zeros_like(self.reset_buf, device=self.device, dtype=torch.float) self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float) self._prev_root_pos_op = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float) self._prev_body_ang_vel = torch.zeros([self.num_envs, self.num_bodies, 3], device=self.device, dtype=torch.float32) self._prev_body_vel = torch.zeros([self.num_envs, self.num_bodies, 3], device=self.device, dtype=torch.float32) strike_body_names = cfg["env"]["strikeBodyNames"] self._strike_body_ids = self._build_body_ids_tensor(self.envs[0], self.humanoid_handles[0], strike_body_names) force_body_names = cfg["env"]["forceBodies"] self._force_body_ids = self._build_body_ids_tensor(self.envs[0], self.humanoid_handles[0], force_body_names) if self.viewer != None: for env in self.envs: self._add_rectangle_borderline(env) cam_pos = gymapi.Vec3(15.0, 0.0, 3.0) cam_target = gymapi.Vec3(10.0, 0.0, 0.0) self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) ###### Reward Definition ###### ###### Reward Definition ###### return def get_task_obs_size(self): obs_size = 0 if (self._enable_task_obs): obs_size = 50 return obs_size def _create_envs(self, num_envs, spacing, num_per_row): super()._create_envs(num_envs, spacing, num_per_row) return def _build_env(self, env_id, env_ptr, humanoid_asset, humanoid_asset_op): super()._build_env(env_id, env_ptr, humanoid_asset, humanoid_asset_op) return def _build_body_ids_tensor(self, env_ptr, actor_handle, body_names): env_ptr = self.envs[0] actor_handle = self.humanoid_handles[0] body_ids = [] for body_name in body_names: body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) assert(body_id != -1) body_ids.append(body_id) body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) return body_ids def _reset_actors(self, env_ids): positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=self.device) velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device) self._dof_pos[env_ids] = tensor_clamp(self._initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper) self._dof_vel[env_ids] = velocities self._dof_pos_op[env_ids] = tensor_clamp(self._initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper) self._dof_vel_op[env_ids] = velocities agent_env_ids = expand_env_ids(env_ids, 2) rand_angle = torch.rand((len(env_ids),), device=self.device) * math.pi * 2 rand_pos = torch.ones((len(agent_env_ids), 2), device=self.device) * ( self.borderline_space * torch.ones((len(agent_env_ids), 2), device=self.device) - torch.rand( (len(agent_env_ids), 2), device=self.device) * 2) rand_pos[0::2, 0] *= torch.cos(rand_angle) rand_pos[0::2, 1] *= torch.sin(rand_angle) rand_pos[1::2, 0] *= torch.cos(rand_angle + math.pi) rand_pos[1::2, 1] *= torch.sin(rand_angle + math.pi) rand_floats = torch_rand_float(-1.0, 1.0, (len(agent_env_ids), 3), device=self.device) rand_rotation = quat_from_angle_axis(rand_floats[:, 1] * np.pi, self.z_unit_tensor[agent_env_ids]) rand_rotation2 = quat_from_angle_axis(rand_floats[:, 2] * np.pi, self.z_unit_tensor[agent_env_ids]) self._humanoid_root_states[agent_env_ids] = self._initial_humanoid_root_states[agent_env_ids] self._humanoid_root_states[agent_env_ids, :2] = rand_pos self._humanoid_root_states[agent_env_ids[1::2], 3:7] = rand_rotation[1::2] self._humanoid_root_states[agent_env_ids[0::2], 3:7] = rand_rotation2[0::2] return def _reset_env_tensors(self, env_ids): super()._reset_env_tensors(env_ids) self.ego_to_op_damage[env_ids] = 0 self.op_to_ego_damage[env_ids] = 0 return def pre_physics_step(self, actions): super().pre_physics_step(actions) # self._prev_root_pos[:] = self._humanoid_root_states[self.humanoid_indices, 0:3] # self._prev_root_pos_op[:] = self._humanoid_root_states[self.humanoid_indices_op, 0:3] # self._prev_body_ang_vel[:] = self._rigid_body_ang_vel[] return def post_physics_step(self): super().post_physics_step() self._prev_body_ang_vel[:] = self._rigid_body_ang_vel[:] self._prev_body_vel[:] = self._rigid_body_vel[:] def _compute_observations(self): obs, obs_op = self._compute_humanoid_obs() if (self._enable_task_obs): task_obs, task_obs_op = self._compute_task_obs() obs = torch.cat([obs, task_obs], dim=-1) obs_op = torch.cat([obs_op, task_obs_op], dim=-1) self.obs_buf[:self.num_envs] = obs self.obs_buf[self.num_envs:] = obs_op return def _compute_task_obs(self): body_pos = self._rigid_body_pos body_rot = self._rigid_body_rot body_vel = self._rigid_body_vel body_pos_op = self._rigid_body_pos_op body_rot_op = self._rigid_body_rot_op body_vel_op = self._rigid_body_vel_op # num_envs, 13 root_states = self._humanoid_root_states[self.humanoid_indices] root_states_op = self._humanoid_root_states[self.humanoid_indices_op] obs = compute_strike_observations(root_states, root_states_op, body_pos, body_rot, body_pos_op, body_vel_op, borderline=self.borderline_space ) obs_op = compute_strike_observations(root_states=root_states_op, root_states_op=root_states, body_pos=body_pos_op, body_rot=body_rot_op, body_pos_op=body_pos, body_vel_op=body_vel, borderline=self.borderline_space) return obs, obs_op def _compute_reward(self, actions): root_states = self._humanoid_root_states[self.humanoid_indices] root_states_op = self._humanoid_root_states[self.humanoid_indices_op] body_pos = self._rigid_body_pos body_vel = self._rigid_body_vel prev_body_vel = self._prev_body_vel body_ang_vel = self._rigid_body_ang_vel prev_body_ang_vel = self._prev_body_ang_vel contact_force = self._contact_forces body_pos_op = self._rigid_body_pos_op contact_force_op = self._contact_forces_op self.rew_buf[:], force_ego_to_op, force_op_to_ego = compute_strike_reward(root_states=root_states, root_states_op=root_states_op, body_pos=body_pos, body_ang_vel=body_ang_vel, prev_body_ang_vel=prev_body_ang_vel, body_vel=body_vel, prev_body_vel=prev_body_vel, body_pos_op=body_pos_op, force_body_ids=self._force_body_ids, strike_body_ids=self._strike_body_ids, contact_force=contact_force, contact_force_op=contact_force_op, contact_body_ids=self._contact_body_ids, borderline=self.borderline_space, termination_heights=self._termination_heights, dt=self.dt) self.ego_to_op_damage += force_ego_to_op self.op_to_ego_damage += force_op_to_ego return def _compute_reset(self): self.reset_buf[:], self._terminate_buf[:],\ self.extras['win'], self.extras['lose'], self.extras['draw'] = \ compute_humanoid_reset(self.reset_buf, self.progress_buf, self.ego_to_op_damage, self.op_to_ego_damage, self._contact_forces, self._contact_forces_op, self._contact_body_ids, self._rigid_body_pos, self._rigid_body_pos_op, self.max_episode_length, self._enable_early_termination, self._termination_heights, self.borderline_space) return ##################################################################### ###=========================jit functions=========================### ##################################################################### @torch.jit.script def compute_strike_observations(root_states, root_states_op, body_pos, body_rot, body_pos_op, body_vel_op, borderline, ): # type: (Tensor, Tensor, Tensor, Tensor, Tensor,Tensor,float) -> Tensor root_pos = root_states[:, 0:3] root_rot = root_states[:, 3:7] ego_sword_pos = body_pos[:, 6, :] ego_sword_rot = body_rot[:, 6, :] ego_shield_pos = body_pos[:, 9, :] ego_shield_rot = body_rot[:, 9, :] root_pos_op = root_states_op[:, 0:3] root_rot_op = root_states_op[:, 3:7] root_vel_op = root_states_op[:, 7:10] root_ang_op = root_states_op[:, 10:13] op_sword_pos = body_pos_op[:, 6, :] op_sword_vel = body_vel_op[:, 6, :] op_torso_pos = body_pos_op[:, 1, :] op_torso_vel = body_vel_op[:, 1, :] op_head_pos = body_pos_op[:, 2, :] op_head_vel = body_vel_op[:, 2, :] op_right_upper_arm_pos = body_pos_op[:, 3, :] op_right_thigh_pos = body_pos_op[:, 11, :] op_left_thigh_pos = body_pos_op[:, 14, :] ##*******************************************************## relative_x_1 = borderline - root_pos[:, 0] relative_x_2 = root_pos[:, 0] + borderline relative_x = torch.minimum(relative_x_1, relative_x_2) relative_x = torch.unsqueeze(relative_x, -1) relative_y_1 = borderline - root_pos[:, 1] relative_y_2 = root_pos[:,1] + borderline relative_y = torch.minimum(relative_y_1, relative_y_2) relative_y = torch.unsqueeze(relative_y, -1) ##*******************************************************## heading_rot = torch_utils.calc_heading_quat_inv(root_rot) sword_rot = torch_utils.calc_heading_quat_inv(ego_sword_rot) shield_rot = torch_utils.calc_heading_quat_inv(ego_shield_rot) local_op_relative_pos = root_pos_op - root_pos local_op_relative_pos[..., -1] = root_pos_op[..., -1] local_op_relative_pos = quat_rotate(heading_rot, local_op_relative_pos) local_op_vel = quat_rotate(heading_rot, root_vel_op) local_op_ang_vel = quat_rotate(heading_rot, root_ang_op) local_op_rot = quat_mul(heading_rot, root_rot_op) local_op_rot_obs = torch_utils.quat_to_tan_norm(local_op_rot) ##*******************************************************## # op sword relative ego position and vel local_op_relative_sword_pos = op_sword_pos - root_pos local_op_relative_sword_pos = quat_rotate(heading_rot, local_op_relative_sword_pos) local_op_sword_vel = quat_rotate(heading_rot, op_sword_vel) # op sword relative ego shield position and vel local_op_sword_shield_pos = op_sword_pos - ego_shield_pos local_op_sword_shield_pos = quat_rotate(shield_rot, local_op_sword_shield_pos) local_op_sword_shield_vel = quat_rotate(shield_rot, op_sword_vel) # relative position and vel of ego sword and op up body relative_sword_torso_pos = op_torso_pos - ego_sword_pos relative_sword_torso_pos = quat_rotate(sword_rot, relative_sword_torso_pos) relative_sword_torso_vel = quat_rotate(sword_rot, op_torso_vel) relative_sword_head_pos = op_head_pos - ego_sword_pos relative_sword_head_pos = quat_rotate(sword_rot, relative_sword_head_pos) relative_sword_head_vel = quat_rotate(sword_rot, op_head_vel) relative_sword_right_arm_pos = op_right_upper_arm_pos - ego_sword_pos relative_sword_right_arm_pos = quat_rotate(sword_rot, relative_sword_right_arm_pos) relative_sword_right_thigh_pos = op_right_thigh_pos - ego_sword_pos relative_sword_right_thigh_pos = quat_rotate(sword_rot, relative_sword_right_thigh_pos) relative_sword_left_thigh_pos = op_left_thigh_pos - ego_sword_pos relative_sword_left_thigh_pos = quat_rotate(sword_rot, relative_sword_left_thigh_pos) obs = torch.cat([relative_x, relative_y, local_op_relative_pos, local_op_rot_obs, local_op_vel, local_op_ang_vel, local_op_relative_sword_pos, local_op_sword_vel, local_op_sword_shield_pos, local_op_sword_shield_vel, relative_sword_torso_pos, relative_sword_torso_vel, relative_sword_head_pos, relative_sword_head_vel, relative_sword_right_arm_pos, relative_sword_right_thigh_pos, relative_sword_left_thigh_pos ], dim=-1) return obs @torch.jit.script def compute_strike_reward(root_states, root_states_op, body_pos, body_ang_vel, prev_body_ang_vel, body_vel, prev_body_vel, body_pos_op, force_body_ids, strike_body_ids, contact_force, contact_force_op, contact_body_ids, borderline, termination_heights, dt): # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor,Tensor,Tensor,Tensor,float, Tensor, float) -> Tuple[Tensor, Tensor,Tensor] op_fall_reward_w = 200.0 ego_fall_out_reward_w = 50.0 shield_to_sword_pos_reward_w = 1.0 damage_reward_w = 8.0 sword_to_op_reward_w = 0.8 reward_energy_w = 3.0 reward_strike_vel_acc_w = 3.0 reward_face_w = 4.0 reward_foot_to_op_w = 10.0 reward_kick_w = 2.0 num_envs = root_states.shape[0] reward = torch.zeros((num_envs, 1), dtype=torch.float32) root_xy_pos = root_states[:, 0:2] root_pos = root_states[:, 0:3] ego_sword_pos = body_pos[:, 6, 0:3] ego_shield_pos = body_pos[:, 9, 0:3] ego_right_foot_pos = body_pos[:, 13, 0:3] op_sword_pos = body_pos_op[:, 6, 0:3] op_torse_pos = body_pos_op[:, 1, 0:3] op_right_thigh_pos = body_pos_op[:, 11, 0:3] op_left_thigh_pos = body_pos_op[:, 14, 0:3] root_pos_xy_op = root_states_op[:, 0:2] root_pos_xy = root_states[:, 0:2] root_pos_op = root_states_op[:, 0:3] root_rot = root_states[:, 3:7] root_rot_op = root_states_op[:, 3:7] up = torch.zeros_like(root_pos_op) up[..., -1] = 1 contact_buf = contact_force.clone() contact_buf_op = contact_force_op.clone() ##*****************r energy******************## strike_body_vel = body_vel[:, strike_body_ids, :] strike_body_vel_norm = torch.sum(torch.norm(strike_body_vel, dim=-1), dim=1) strike_body_vel_norm = torch.clamp(strike_body_vel_norm, max=20) distance = root_pos_xy_op - root_xy_pos distance = torch.norm(distance, dim=-1) zeros = torch.zeros_like(distance) k_dist = torch.exp(-10 * torch.maximum(zeros, distance - 2.0)) r_energy = k_dist * strike_body_vel_norm r_energy = r_energy / 20. strike_vel_dfff = body_vel[:, strike_body_ids, :] - prev_body_vel[:, strike_body_ids, :] strike_vel_acc = strike_vel_dfff / dt strike_vel_acc = torch.sum(torch.norm(strike_vel_acc, dim=-1), dim=1) strike_vel_acc = torch.clamp(strike_vel_acc, max=1000) strike_vel_acc = k_dist * strike_vel_acc / 500 r_strike_vel_acc = strike_vel_acc ##*****************r damage******************## ego_to_op_force = contact_buf_op[:, force_body_ids, :] op_to_ego_force = contact_buf[:, force_body_ids, :] force_ego_to_op = torch.norm(ego_to_op_force, dim=2).sum(dim=1) force_op_to_ego = torch.norm(op_to_ego_force, dim=2).sum(dim=1) r_damage = force_ego_to_op - force_op_to_ego * 2 r_damage = torch.clamp(r_damage, min= -200.) r_damage /= 100 ##*****************r kick******************## ego_foot_op_torse_distance = op_torse_pos - ego_right_foot_pos ego_foot_op_torse_err = torch.norm(ego_foot_op_torse_distance, dim=-1) succ_foot = ego_foot_op_torse_err < 0.1 r_foot_to_op = torch.exp(-0.5 * ego_foot_op_torse_err) constant_r = torch.ones_like(r_foot_to_op) r_foot_to_op = torch.where(succ_foot, constant_r, r_foot_to_op) foot_height = ego_right_foot_pos[..., 2] succ_kick = foot_height >= 0.4 zeros = torch.zeros_like(succ_kick) constant_r_kick = torch.ones_like(succ_kick) r_kick = torch.where(succ_kick, constant_r_kick, foot_height) ##*****************r close******************## # sword -> torso pos_err_scale1 = 1.0 pos_err_scale2 = 2.0 sword_torse_distance = op_torse_pos - ego_sword_pos sword_torse_err = torch.sum(sword_torse_distance * sword_torse_distance, dim=-1) sword_right_thigh_distance = op_right_thigh_pos - ego_sword_pos sword_right_thigh_err = torch.sum(sword_right_thigh_distance * sword_right_thigh_distance, dim=-1) sword_left_thigh_distance = op_left_thigh_pos - ego_sword_pos sword_left_thigh_err = torch.sum(sword_left_thigh_distance * sword_left_thigh_distance, dim=-1) sword_sword_distance = op_sword_pos - ego_sword_pos sword_sword_err = torch.sum(sword_sword_distance * sword_sword_distance, dim=-1) # zeros = torch.zeros_like(sword_torse_distance) r_close = torch.exp(-pos_err_scale1 * sword_torse_err) # -> [0, 1] r_close += torch.exp(-pos_err_scale1 * sword_right_thigh_err) r_close += torch.exp(-pos_err_scale1 * sword_left_thigh_err) r_close += torch.exp(-pos_err_scale2 * sword_sword_err) ##*****************r shelid with op sword******************## pos_err_scale3 = 2.0 ego_shield_op_sword_distance = op_sword_pos - ego_shield_pos ego_shield_op_sword_err = torch.sum(ego_shield_op_sword_distance * ego_shield_op_sword_distance, dim=-1) r_shield_to_sword = torch.exp(-pos_err_scale3 * ego_shield_op_sword_err) ##*****************r face******************## tar_dir = root_pos_xy_op - root_xy_pos tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1) heading_rot = torch_utils.calc_heading_quat(root_rot) facing_dir = torch.zeros_like(root_pos) facing_dir[..., 0] = 1.0 facing_dir = quat_rotate(heading_rot, facing_dir) facing_err = torch.sum(tar_dir * facing_dir[..., 0:2], dim=-1) facing_reward = torch.clamp_min(facing_err, 0.0) ##*****************r op fall******************## masked_contact_buf_op = contact_buf_op.clone() masked_contact_buf_op[:, contact_body_ids, :] = 0 fall_contact_op = torch.any(torch.abs(masked_contact_buf_op) > 0.1, dim=-1) fall_contact_op = torch.any(fall_contact_op, dim=-1) body_height_op = body_pos_op[..., 2] fall_height_op = body_height_op < termination_heights fall_height_op[:, contact_body_ids] = False fall_height_op = torch.any(fall_height_op, dim=-1) has_fallen_op = torch.logical_and(fall_contact_op, fall_height_op) op_up = quat_rotate(root_rot_op, up) op_rot_err = torch.sum(up * op_up, dim=-1) op_rot_r = 0.6 * torch.clamp_min(1.0 - op_rot_err, 0.0) # -> [0, 1] succ = op_rot_err < 0.2 op_rot_r = torch.where(has_fallen_op, torch.ones_like(op_rot_r), op_rot_r) # test, when op fall, then r_close = 0 to encourage to agents separate. r_separate = torch.norm((root_pos_xy_op - root_pos_xy), dim=-1) r_separate = torch.where(r_separate > 0.1, r_separate, torch.zeros_like(r_separate)) r_close = torch.where(has_fallen_op, r_separate, r_close) r_shield_to_sword = torch.where(has_fallen_op, torch.zeros_like(r_shield_to_sword), r_shield_to_sword) ##*****************r penalty******************## relative_x_1 = borderline - root_xy_pos[:, 0] relative_x_2 = root_xy_pos[:, 0] + borderline relative_x = torch.minimum(relative_x_1, relative_x_2) relative_x = relative_x < 0 relative_y_1 = borderline - root_xy_pos[:, 1] relative_y_2 = root_xy_pos[:,1] + borderline relative_y = torch.minimum(relative_y_1, relative_y_2) relative_y = relative_y < 0 is_out = relative_x | relative_y r_penalty = is_out * 1.0 masked_contact_buf = contact_force.clone() masked_contact_buf[:, contact_body_ids, :] = 0 fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1) fall_contact = torch.any(fall_contact, dim=-1) body_height = body_pos[..., 2] fall_height = body_height < termination_heights fall_height[:, contact_body_ids] = False fall_height = torch.any(fall_height, dim=-1) has_fallen_ego = torch.logical_and(fall_contact, fall_height) r_penalty += has_fallen_ego * 1.0 ##*****************r penalty******************## reward = -r_penalty * ego_fall_out_reward_w + op_rot_r * op_fall_reward_w + \ r_shield_to_sword * shield_to_sword_pos_reward_w + r_close * sword_to_op_reward_w +\ r_damage * damage_reward_w + r_energy * reward_energy_w + facing_reward * reward_face_w + \ r_strike_vel_acc * reward_strike_vel_acc_w + r_foot_to_op * reward_foot_to_op_w +\ r_kick * reward_kick_w return reward, force_ego_to_op, force_op_to_ego @torch.jit.script def compute_humanoid_reset(reset_buf, progress_buf, ego_to_op_damage, op_to_ego_damage, contact_buf, contact_buf_op, contact_body_ids, rigid_body_pos, rigid_body_pos_op, max_episode_length, enable_early_termination, termination_heights, borderline): # type: (Tensor, Tensor, Tensor, Tensor,Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor, float) -> Tuple[Tensor, Tensor,Tensor,Tensor,Tensor] terminated = torch.zeros_like(reset_buf) if (enable_early_termination): masked_contact_buf = contact_buf.clone() masked_contact_buf_op = contact_buf_op.clone() masked_contact_buf[:, contact_body_ids, :] = 0 masked_contact_buf_op[:, contact_body_ids, :] = 0 fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1) fall_contact = torch.any(fall_contact, dim=-1) fall_contact_op = torch.any(torch.abs(masked_contact_buf_op) > 0.1, dim=-1) fall_contact_op = torch.any(fall_contact_op, dim=-1) body_height = rigid_body_pos[..., 2] body_height_op = rigid_body_pos_op[..., 2] fall_height = body_height < termination_heights fall_height_op = body_height_op < termination_heights fall_height[:, contact_body_ids] = False fall_height_op[:, contact_body_ids] = False fall_height = torch.any(fall_height, dim=-1) fall_height_op = torch.any(fall_height_op, dim=-1) ## out area root_pos = rigid_body_pos[:, 0, 0:2] root_pos_op = rigid_body_pos_op[:, 0, 0:2] relative_x_1 = borderline - root_pos[:, 0] relative_x_2 = root_pos[:, 0] + borderline relative_x = torch.minimum(relative_x_1, relative_x_2) relative_x = relative_x < 0 relative_y_1 = borderline - root_pos[:, 1] relative_y_2 = root_pos[:,1] + borderline relative_y = torch.minimum(relative_y_1, relative_y_2) relative_y = relative_y < 0 is_out_ego = relative_x | relative_y relative_x_1_op = borderline - root_pos_op[:, 0] relative_x_2_op = root_pos_op[:, 0] + borderline relative_x_op = torch.minimum(relative_x_1_op, relative_x_2_op) relative_x_op = relative_x_op < 0 relative_y_1_op = borderline - root_pos_op[:, 1] relative_y_2_op = root_pos_op[:,1] + borderline relative_y_op = torch.minimum(relative_y_1_op, relative_y_2_op) relative_y_op = relative_y_op < 0 is_out_op = relative_x_op | relative_y_op is_out = is_out_ego | is_out_op has_failed = is_out # first timestep can sometimes still have nonzero contact forces # so only check after first couple of steps has_failed *= (progress_buf > 1) terminated = torch.where(has_failed, torch.ones_like(reset_buf), terminated) damage_ego_more_than_op = ego_to_op_damage > op_to_ego_damage damage_op_more_than_ego = op_to_ego_damage > ego_to_op_damage reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated) win = torch.where(reset, damage_ego_more_than_op, torch.zeros_like(reset_buf, dtype=torch.bool)) lose = torch.where(reset, damage_op_more_than_ego, torch.zeros_like(reset_buf, dtype=torch.bool)) draw = torch.where(reset, ego_to_op_damage == op_to_ego_damage, torch.zeros_like(reset_buf, dtype=torch.bool)) return reset, terminated, win, lose, draw @torch.jit.script def expand_env_ids(env_ids, n_agents): # type: (Tensor, int) -> Tensor device = env_ids.device agent_env_ids = torch.zeros((n_agents * len(env_ids)), device=device, dtype=torch.long) for idx in range(n_agents): agent_env_ids[idx::n_agents] = env_ids * n_agents + idx return agent_env_ids ================================================ FILE: timechamber/train.py ================================================ # train.py # Script to train policies in Isaac Gym # # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import datetime from statistics import mode import isaacgym import os import hydra import yaml from omegaconf import DictConfig, OmegaConf from hydra.utils import to_absolute_path import gym from timechamber.utils.reformat import omegaconf_to_dict, print_dict from timechamber.utils.utils import set_np_formatting, set_seed from timechamber.utils.rlgames_utils import RLGPUEnv, RLGPUAlgoObserver, get_rlgames_env_creator from rl_games.common import env_configurations, vecenv from rl_games.torch_runner import Runner from rl_games.algos_torch import model_builder from timechamber.ase import ase_agent from timechamber.ase import ase_models from timechamber.ase import ase_network_builder from timechamber.ase import hrl_models from timechamber.ase import hrl_network_builder from timechamber.learning import ppo_sp_agent from timechamber.learning import hrl_sp_agent from timechamber.learning import ppo_sp_player from timechamber.learning import hrl_sp_player from timechamber.learning import vectorized_models from timechamber.learning import vectorized_network_builder import timechamber ## OmegaConf & Hydra Config # Resolvers used in hydra configs (see https://omegaconf.readthedocs.io/en/2.1_branch/usage.html#resolvers) @hydra.main(config_name="config", config_path="./cfg") def launch_rlg_hydra(cfg: DictConfig): time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") run_name = f"{cfg.wandb_name}_{time_str}" # ensure checkpoints can be specified as relative paths if cfg.checkpoint: cfg.checkpoint = to_absolute_path(cfg.checkpoint) cfg_dict = omegaconf_to_dict(cfg) print_dict(cfg_dict) # set numpy formatting for printing only set_np_formatting() rank = int(os.getenv("LOCAL_RANK", "0")) if cfg.multi_gpu: # torchrun --standalone --nnodes=1 --nproc_per_node=2 train.py cfg.sim_device = f'cuda:{rank}' cfg.rl_device = f'cuda:{rank}' # sets seed. if seed is -1 will pick a random one cfg.seed += rank cfg.seed = set_seed(cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=rank) if cfg.wandb_activate and rank == 0: # Make sure to install WandB if you actually use this. import wandb run = wandb.init( project=cfg.wandb_project, group=cfg.wandb_group, entity=cfg.wandb_entity, config=cfg_dict, sync_tensorboard=True, name=run_name, resume="allow", ) def create_env_thunk(**kwargs): envs = timechamber.make( cfg.seed, cfg.task_name, cfg.task.env.numEnvs, cfg.sim_device, cfg.rl_device, cfg.graphics_device_id, cfg.device_type, cfg.headless, cfg.multi_gpu, cfg.capture_video, cfg.force_render, cfg, **kwargs, ) if cfg.capture_video: envs.is_vector_env = True envs = gym.wrappers.RecordVideo( envs, f"videos/{run_name}", step_trigger=lambda step: step % cfg.capture_video_freq == 0, video_length=cfg.capture_video_len, ) return envs # register the rl-games adapter to use inside the runner vecenv.register('RLGPU', lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs)) env_configurations.register('rlgpu', { 'vecenv_type': 'RLGPU', 'env_creator': create_env_thunk, }) # register new AMP network builder and agent def build_runner(algo_observer): runner = Runner(algo_observer) runner.algo_factory.register_builder('self_play_continuous', lambda **kwargs: ppo_sp_agent.SPAgent(**kwargs)) runner.algo_factory.register_builder('self_play_hrl', lambda **kwargs: hrl_sp_agent.HRLSPAgent(**kwargs)) runner.algo_factory.register_builder('ase', lambda **kwargs: ase_agent.ASEAgent(**kwargs)) runner.player_factory.register_builder('self_play_continuous', lambda **kwargs: ppo_sp_player.SPPlayer(**kwargs)) runner.player_factory.register_builder('self_play_hrl', lambda **kwargs: hrl_sp_player.HRLSPPlayer(**kwargs)) # runner. model_builder.register_model('hrl', lambda network, **kwargs: hrl_models.ModelHRLContinuous(network)) model_builder.register_model('ase', lambda network, **kwargs: ase_models.ModelASEContinuous(network)) model_builder.register_model('vectorized_a2c', lambda network, **kwargs: vectorized_models.ModelVectorizedA2C(network)) model_builder.register_network('vectorized_a2c', lambda **kwargs: vectorized_network_builder.VectorizedA2CBuilder()) model_builder.register_network('ase', lambda **kwargs: ase_network_builder.ASEBuilder()) model_builder.register_network('hrl', lambda **kwargs: hrl_network_builder.HRLBuilder()) return runner rlg_config_dict = omegaconf_to_dict(cfg.train) # convert CLI arguments into dictionory # create runner and set the settings runner = build_runner(RLGPUAlgoObserver()) runner.load(rlg_config_dict) runner.reset() # dump config dict experiment_dir = os.path.join('runs', cfg.train.params.config.name) os.makedirs(experiment_dir, exist_ok=True) with open(os.path.join(experiment_dir, 'config.yaml'), 'w') as f: f.write(OmegaConf.to_yaml(cfg)) if cfg.multi_gpu: import horovod.torch as hvd rank = hvd.rank() else: rank = 0 if cfg.wandb_activate and rank == 0: # Make sure to install WandB if you actually use this. import wandb wandb.init( project=cfg.wandb_project, group=cfg.wandb_group, entity=cfg.wandb_entity, config=cfg_dict, sync_tensorboard=True, id=run_name, resume="allow", monitor_gym=True, ) runner.run({ 'train': not cfg.test, 'play': cfg.test, 'checkpoint': cfg.checkpoint, 'sigma': None }) if cfg.wandb_activate and rank == 0: wandb.finish() if __name__ == "__main__": launch_rlg_hydra() ================================================ FILE: timechamber/utils/config.py ================================================ import os import sys import yaml from isaacgym import gymapi from isaacgym import gymutil import numpy as np import random import torch SIM_TIMESTEP = 1.0 / 60.0 def parse_sim_params(args, cfg): # initialize sim sim_params = gymapi.SimParams() sim_params.dt = SIM_TIMESTEP sim_params.num_client_threads = args.num_subscenes if args.physics_engine == "flex": if args.device_type != "cpu": print("WARNING: Using Flex with GPU instead of PHYSX!") sim_params.flex.shape_collision_margin = 0.01 sim_params.flex.num_outer_iterations = 4 sim_params.flex.num_inner_iterations = 10 elif args.physics_engine == "physx": sim_params.physx.solver_type = 1 sim_params.physx.num_position_iterations = 4 sim_params.physx.num_velocity_iterations = 0 sim_params.physx.num_threads = 4 sim_params.physx.use_gpu = args.use_gpu sim_params.physx.num_subscenes = args.num_subscenes sim_params.physx.max_gpu_contact_pairs = 8 * 1024 * 1024 sim_params.use_gpu_pipeline = args.use_gpu_pipeline sim_params.physx.use_gpu = args.use_gpu # if sim options are provided in cfg, parse them and update/override above: if "sim" in cfg: gymutil.parse_sim_config(cfg["sim"], sim_params) # Override num_threads if passed on the command line if args.physics_engine == "physx" and args.num_threads > 0: sim_params.physx.num_threads = args.num_threads return sim_params ================================================ FILE: timechamber/utils/gym_util.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from timechamber.utils import logger from isaacgym import gymapi import numpy as np import torch from isaacgym.torch_utils import * from isaacgym import gymtorch def setup_gym_viewer(config): gym = initialize_gym(config) sim, viewer = configure_gym(gym, config) return gym, sim, viewer def initialize_gym(config): gym = gymapi.acquire_gym() if not gym.initialize(): logger.warn("*** Failed to initialize gym") quit() return gym def configure_gym(gym, config): engine, render = config['engine'], config['render'] # physics engine settings if(engine == 'FLEX'): sim_engine = gymapi.SIM_FLEX elif(engine == 'PHYSX'): sim_engine = gymapi.SIM_PHYSX else: logger.warn("Uknown physics engine. defaulting to FLEX") sim_engine = gymapi.SIM_FLEX # gym viewer if render: # create viewer sim = gym.create_sim(0, 0, sim_type=sim_engine) viewer = gym.create_viewer( sim, int(gymapi.DEFAULT_VIEWER_WIDTH / 1.25), int(gymapi.DEFAULT_VIEWER_HEIGHT / 1.25) ) if viewer is None: logger.warn("*** Failed to create viewer") quit() # enable left mouse click or space bar for throwing projectiles if config['add_projectiles']: gym.subscribe_viewer_mouse_event(viewer, gymapi.MOUSE_LEFT_BUTTON, "shoot") gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_SPACE, "shoot") else: sim = gym.create_sim(0, -1) viewer = None # simulation params scene_config = config['env']['scene'] sim_params = gymapi.SimParams() sim_params.solver_type = scene_config['SolverType'] sim_params.num_outer_iterations = scene_config['NumIterations'] sim_params.num_inner_iterations = scene_config['NumInnerIterations'] sim_params.relaxation = scene_config.get('Relaxation', 0.75) sim_params.warm_start = scene_config.get('WarmStart', 0.25) sim_params.geometric_stiffness = scene_config.get('GeometricStiffness', 1.0) sim_params.shape_collision_margin = 0.01 sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0) gym.set_sim_params(sim, sim_params) return sim, viewer def parse_states_from_reference_states(reference_states, progress): # parse reference states from DeepMimicState global_quats_ref = torch.tensor( reference_states._global_rotation[(progress,)].numpy(), dtype=torch.double ).cuda() ts_ref = torch.tensor( reference_states._translation[(progress,)].numpy(), dtype=torch.double ).cuda() vels_ref = torch.tensor( reference_states._velocity[(progress,)].numpy(), dtype=torch.double ).cuda() avels_ref = torch.tensor( reference_states._angular_velocity[(progress,)].numpy(), dtype=torch.double ).cuda() return global_quats_ref, ts_ref, vels_ref, avels_ref def parse_states_from_reference_states_with_motion_id(precomputed_state, progress, motion_id): assert len(progress) == len(motion_id) # get the global id global_id = precomputed_state['motion_offset'][motion_id] + progress global_id = np.minimum(global_id, precomputed_state['global_quats_ref'].shape[0] - 1) # parse reference states from DeepMimicState global_quats_ref = precomputed_state['global_quats_ref'][global_id] ts_ref = precomputed_state['ts_ref'][global_id] vels_ref = precomputed_state['vels_ref'][global_id] avels_ref = precomputed_state['avels_ref'][global_id] return global_quats_ref, ts_ref, vels_ref, avels_ref def parse_dof_state_with_motion_id(precomputed_state, dof_state, progress, motion_id): assert len(progress) == len(motion_id) # get the global id global_id = precomputed_state['motion_offset'][motion_id] + progress # NOTE: it should never reach the dof_state.shape, cause the episode is # terminated 2 steps before global_id = np.minimum(global_id, dof_state.shape[0] - 1) # parse reference states from DeepMimicState return dof_state[global_id] def get_flatten_ids(precomputed_state): motion_offsets = precomputed_state['motion_offset'] init_state_id, init_motion_id, global_id = [], [], [] for i_motion in range(len(motion_offsets) - 1): i_length = motion_offsets[i_motion + 1] - motion_offsets[i_motion] init_state_id.extend(range(i_length)) init_motion_id.extend([i_motion] * i_length) if len(global_id) == 0: global_id.extend(range(0, i_length)) else: global_id.extend(range(global_id[-1] + 1, global_id[-1] + i_length + 1)) return np.array(init_state_id), np.array(init_motion_id), \ np.array(global_id) def parse_states_from_reference_states_with_global_id(precomputed_state, global_id): # get the global id global_id = global_id % precomputed_state['global_quats_ref'].shape[0] # parse reference states from DeepMimicState global_quats_ref = precomputed_state['global_quats_ref'][global_id] ts_ref = precomputed_state['ts_ref'][global_id] vels_ref = precomputed_state['vels_ref'][global_id] avels_ref = precomputed_state['avels_ref'][global_id] return global_quats_ref, ts_ref, vels_ref, avels_ref def get_robot_states_from_torch_tensor(config, ts, global_quats, vels, avels, init_rot, progress, motion_length=-1, actions=None, relative_rot=None, motion_id=None, num_motion=None, motion_onehot_matrix=None): info = {} # the observation with quaternion-based representation torso_height = ts[..., 0, 1].cpu().numpy() gttrny, gqny, vny, avny, info['root_yaw_inv'] = \ quaternion_math.compute_observation_return_info(global_quats, ts, vels, avels) joint_obs = np.concatenate([gttrny.cpu().numpy(), gqny.cpu().numpy(), vny.cpu().numpy(), avny.cpu().numpy()], axis=-1) joint_obs = joint_obs.reshape(joint_obs.shape[0], -1) num_envs = joint_obs.shape[0] obs = np.concatenate([torso_height[:, np.newaxis], joint_obs], -1) # the previous action if config['env_action_ob']: obs = np.concatenate([obs, actions], axis=-1) # the orientation if config['env_orientation_ob']: if relative_rot is not None: obs = np.concatenate([obs, relative_rot], axis=-1) else: curr_rot = global_quats[np.arange(num_envs)][:, 0] curr_rot = curr_rot.reshape(num_envs, -1, 4) relative_rot = quaternion_math.compute_orientation_drift( init_rot, curr_rot ).cpu().numpy() obs = np.concatenate([obs, relative_rot], axis=-1) if config['env_frame_ob']: if type(motion_length) == np.ndarray: motion_length = motion_length.astype(np.float) progress_ob = np.expand_dims(progress.astype(np.float) / motion_length, axis=-1) else: progress_ob = np.expand_dims(progress.astype(np.float) / float(motion_length), axis=-1) obs = np.concatenate([obs, progress_ob], axis=-1) if config['env_motion_ob'] and not config['env_motion_ob_onehot']: motion_id_ob = np.expand_dims(motion_id.astype(np.float) / float(num_motion), axis=-1) obs = np.concatenate([obs, motion_id_ob], axis=-1) elif config['env_motion_ob'] and config['env_motion_ob_onehot']: motion_id_ob = motion_onehot_matrix[motion_id] obs = np.concatenate([obs, motion_id_ob], axis=-1) return obs, info def get_xyzoffset(start_ts, end_ts, root_yaw_inv): xyoffset = (end_ts - start_ts)[:, [0], :].reshape(1, -1, 1, 3) ryinv = root_yaw_inv.reshape(1, -1, 1, 4) calibrated_xyz_offset = quaternion_math.quat_apply(ryinv, xyoffset)[0, :, 0, :] return calibrated_xyz_offset ================================================ FILE: timechamber/utils/logger.py ================================================ # ----------------------------------------------------------------------------- # @brief: # The logger here will be called all across the project. It is inspired # by Yuxin Wu (ppwwyyxx@gmail.com) # # @author: # Tingwu Wang, 2017, Feb, 20th # ----------------------------------------------------------------------------- import logging import sys import os import datetime from termcolor import colored __all__ = ['set_file_handler'] # the actual worker is the '_logger' class _MyFormatter(logging.Formatter): ''' @brief: a class to make sure the format could be used ''' def format(self, record): date = colored('[%(asctime)s @%(filename)s:%(lineno)d]', 'green') msg = '%(message)s' if record.levelno == logging.WARNING: fmt = date + ' ' + \ colored('WRN', 'red', attrs=[]) + ' ' + msg elif record.levelno == logging.ERROR or \ record.levelno == logging.CRITICAL: fmt = date + ' ' + \ colored('ERR', 'red', attrs=['underline']) + ' ' + msg else: fmt = date + ' ' + msg if hasattr(self, '_style'): # Python3 compatibilty self._style._fmt = fmt self._fmt = fmt return super(self.__class__, self).format(record) _logger = logging.getLogger('joint_embedding') _logger.propagate = False _logger.setLevel(logging.INFO) # set the console output handler con_handler = logging.StreamHandler(sys.stdout) con_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S')) _logger.addHandler(con_handler) class GLOBAL_PATH(object): def __init__(self, path=None): if path is None: path = os.getcwd() self.path = path def _set_path(self, path): self.path = path def _get_path(self): return self.path PATH = GLOBAL_PATH() def set_file_handler(path=None, prefix='', time_str=''): # set the file output handler if time_str == '': file_name = prefix + \ datetime.datetime.now().strftime("%A_%d_%B_%Y_%I:%M%p") + '.log' else: file_name = prefix + time_str + '.log' if path is None: mod = sys.modules['__main__'] path = os.path.join(os.path.abspath(mod.__file__), '..', '..', 'log') else: path = os.path.join(path, 'log') path = os.path.abspath(path) path = os.path.join(path, file_name) if not os.path.exists(path): os.makedirs(path) PATH._set_path(path) path = os.path.join(path, file_name) from tensorboard_logger import configure configure(path) file_handler = logging.FileHandler( filename=os.path.join(path, 'logger'), encoding='utf-8', mode='w') file_handler.setFormatter(_MyFormatter(datefmt='%m%d %H:%M:%S')) _logger.addHandler(file_handler) _logger.info('Log file set to {}'.format(path)) return path def _get_path(): return PATH._get_path() _LOGGING_METHOD = ['info', 'warning', 'error', 'critical', 'warn', 'exception', 'debug'] # export logger functions for func in _LOGGING_METHOD: locals()[func] = getattr(_logger, func) ================================================ FILE: timechamber/utils/motion_lib.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np import os import yaml from timechamber.tasks.ase_humanoid_base.poselib.poselib.skeleton.skeleton3d import SkeletonMotion from timechamber.tasks.ase_humanoid_base.poselib.poselib.core.rotation3d import * from isaacgym.torch_utils import * from utils import torch_utils import torch USE_CACHE = True print("MOVING MOTION DATA TO GPU, USING CACHE:", USE_CACHE) if not USE_CACHE: old_numpy = torch.Tensor.numpy class Patch: def numpy(self): if self.is_cuda: return self.to("cpu").numpy() else: return old_numpy(self) torch.Tensor.numpy = Patch.numpy class DeviceCache: def __init__(self, obj, device): self.obj = obj self.device = device keys = dir(obj) num_added = 0 for k in keys: try: out = getattr(obj, k) except: print("Error for key=", k) continue if isinstance(out, torch.Tensor): if out.is_floating_point(): out = out.to(self.device, dtype=torch.float32) else: out.to(self.device) setattr(self, k, out) num_added += 1 elif isinstance(out, np.ndarray): out = torch.tensor(out) if out.is_floating_point(): out = out.to(self.device, dtype=torch.float32) else: out.to(self.device) setattr(self, k, out) num_added += 1 print("Total added", num_added) def __getattr__(self, string): out = getattr(self.obj, string) return out class MotionLib(): def __init__(self, motion_file, dof_body_ids, dof_offsets, key_body_ids, device): self._dof_body_ids = dof_body_ids self._dof_offsets = dof_offsets self._num_dof = dof_offsets[-1] self._key_body_ids = torch.tensor(key_body_ids, device=device) self._device = device self._load_motions(motion_file) motions = self._motions self.gts = torch.cat([m.global_translation for m in motions], dim=0).float() self.grs = torch.cat([m.global_rotation for m in motions], dim=0).float() self.lrs = torch.cat([m.local_rotation for m in motions], dim=0).float() self.grvs = torch.cat([m.global_root_velocity for m in motions], dim=0).float() self.gravs = torch.cat([m.global_root_angular_velocity for m in motions], dim=0).float() self.dvs = torch.cat([m.dof_vels for m in motions], dim=0).float() lengths = self._motion_num_frames lengths_shifted = lengths.roll(1) lengths_shifted[0] = 0 self.length_starts = lengths_shifted.cumsum(0) self.motion_ids = torch.arange(len(self._motions), dtype=torch.long, device=self._device) return def num_motions(self): return len(self._motions) def get_total_length(self): return sum(self._motion_lengths) def get_motion(self, motion_id): return self._motions[motion_id] def sample_motions(self, n): motion_ids = torch.multinomial(self._motion_weights, num_samples=n, replacement=True) # m = self.num_motions() # motion_ids = np.random.choice(m, size=n, replace=True, p=self._motion_weights) # motion_ids = torch.tensor(motion_ids, device=self._device, dtype=torch.long) return motion_ids def sample_time(self, motion_ids, truncate_time=None): n = len(motion_ids) phase = torch.rand(motion_ids.shape, device=self._device) motion_len = self._motion_lengths[motion_ids] if (truncate_time is not None): assert(truncate_time >= 0.0) motion_len -= truncate_time motion_time = phase * motion_len return motion_time def get_motion_length(self, motion_ids): return self._motion_lengths[motion_ids] def get_motion_state(self, motion_ids, motion_times): n = len(motion_ids) num_bodies = self._get_num_bodies() num_key_bodies = self._key_body_ids.shape[0] motion_len = self._motion_lengths[motion_ids] num_frames = self._motion_num_frames[motion_ids] dt = self._motion_dt[motion_ids] frame_idx0, frame_idx1, blend = self._calc_frame_blend(motion_times, motion_len, num_frames, dt) f0l = frame_idx0 + self.length_starts[motion_ids] f1l = frame_idx1 + self.length_starts[motion_ids] root_pos0 = self.gts[f0l, 0] root_pos1 = self.gts[f1l, 0] root_rot0 = self.grs[f0l, 0] root_rot1 = self.grs[f1l, 0] local_rot0 = self.lrs[f0l] local_rot1 = self.lrs[f1l] root_vel = self.grvs[f0l] root_ang_vel = self.gravs[f0l] key_pos0 = self.gts[f0l.unsqueeze(-1), self._key_body_ids.unsqueeze(0)] key_pos1 = self.gts[f1l.unsqueeze(-1), self._key_body_ids.unsqueeze(0)] dof_vel = self.dvs[f0l] vals = [root_pos0, root_pos1, local_rot0, local_rot1, root_vel, root_ang_vel, key_pos0, key_pos1] for v in vals: assert v.dtype != torch.float64 blend = blend.unsqueeze(-1) root_pos = (1.0 - blend) * root_pos0 + blend * root_pos1 root_rot = torch_utils.slerp(root_rot0, root_rot1, blend) blend_exp = blend.unsqueeze(-1) key_pos = (1.0 - blend_exp) * key_pos0 + blend_exp * key_pos1 local_rot = torch_utils.slerp(local_rot0, local_rot1, torch.unsqueeze(blend, axis=-1)) dof_pos = self._local_rotation_to_dof(local_rot) return root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos def _load_motions(self, motion_file): self._motions = [] self._motion_lengths = [] self._motion_weights = [] self._motion_fps = [] self._motion_dt = [] self._motion_num_frames = [] self._motion_files = [] total_len = 0.0 motion_files, motion_weights = self._fetch_motion_files(motion_file) num_motion_files = len(motion_files) for f in range(num_motion_files): curr_file = motion_files[f] print("Loading {:d}/{:d} motion files: {:s}".format(f + 1, num_motion_files, curr_file)) curr_motion = SkeletonMotion.from_file(curr_file) motion_fps = curr_motion.fps curr_dt = 1.0 / motion_fps num_frames = curr_motion.tensor.shape[0] curr_len = 1.0 / motion_fps * (num_frames - 1) self._motion_fps.append(motion_fps) self._motion_dt.append(curr_dt) self._motion_num_frames.append(num_frames) curr_dof_vels = self._compute_motion_dof_vels(curr_motion) curr_motion.dof_vels = curr_dof_vels # Moving motion tensors to the GPU if USE_CACHE: curr_motion = DeviceCache(curr_motion, self._device) else: curr_motion.tensor = curr_motion.tensor.to(self._device) curr_motion._skeleton_tree._parent_indices = curr_motion._skeleton_tree._parent_indices.to(self._device) curr_motion._skeleton_tree._local_translation = curr_motion._skeleton_tree._local_translation.to(self._device) curr_motion._rotation = curr_motion._rotation.to(self._device) self._motions.append(curr_motion) self._motion_lengths.append(curr_len) curr_weight = motion_weights[f] self._motion_weights.append(curr_weight) self._motion_files.append(curr_file) self._motion_lengths = torch.tensor(self._motion_lengths, device=self._device, dtype=torch.float32) self._motion_weights = torch.tensor(self._motion_weights, dtype=torch.float32, device=self._device) self._motion_weights /= self._motion_weights.sum() self._motion_fps = torch.tensor(self._motion_fps, device=self._device, dtype=torch.float32) self._motion_dt = torch.tensor(self._motion_dt, device=self._device, dtype=torch.float32) self._motion_num_frames = torch.tensor(self._motion_num_frames, device=self._device) num_motions = self.num_motions() total_len = self.get_total_length() print("Loaded {:d} motions with a total length of {:.3f}s.".format(num_motions, total_len)) return def _fetch_motion_files(self, motion_file): ext = os.path.splitext(motion_file)[1] if (ext == ".yaml"): dir_name = os.path.dirname(motion_file) motion_files = [] motion_weights = [] with open(os.path.join(os.getcwd(), motion_file), 'r') as f: motion_config = yaml.load(f, Loader=yaml.SafeLoader) motion_list = motion_config['motions'] for motion_entry in motion_list: curr_file = motion_entry['file'] curr_weight = motion_entry['weight'] assert(curr_weight >= 0) curr_file = os.path.join(dir_name, curr_file) motion_weights.append(curr_weight) motion_files.append(curr_file) else: motion_files = [motion_file] motion_weights = [1.0] return motion_files, motion_weights def _calc_frame_blend(self, time, len, num_frames, dt): phase = time / len phase = torch.clip(phase, 0.0, 1.0) frame_idx0 = (phase * (num_frames - 1)).long() frame_idx1 = torch.min(frame_idx0 + 1, num_frames - 1) blend = (time - frame_idx0 * dt) / dt return frame_idx0, frame_idx1, blend def _get_num_bodies(self): motion = self.get_motion(0) num_bodies = motion.num_joints return num_bodies def _compute_motion_dof_vels(self, motion): num_frames = motion.tensor.shape[0] dt = 1.0 / motion.fps dof_vels = [] for f in range(num_frames - 1): local_rot0 = motion.local_rotation[f] local_rot1 = motion.local_rotation[f + 1] frame_dof_vel = self._local_rotation_to_dof_vel(local_rot0, local_rot1, dt) frame_dof_vel = frame_dof_vel dof_vels.append(frame_dof_vel) dof_vels.append(dof_vels[-1]) dof_vels = torch.stack(dof_vels, dim=0) return dof_vels def _local_rotation_to_dof(self, local_rot): body_ids = self._dof_body_ids dof_offsets = self._dof_offsets n = local_rot.shape[0] dof_pos = torch.zeros((n, self._num_dof), dtype=torch.float, device=self._device) for j in range(len(body_ids)): body_id = body_ids[j] joint_offset = dof_offsets[j] joint_size = dof_offsets[j + 1] - joint_offset if (joint_size == 3): joint_q = local_rot[:, body_id] joint_exp_map = torch_utils.quat_to_exp_map(joint_q) dof_pos[:, joint_offset:(joint_offset + joint_size)] = joint_exp_map elif (joint_size == 1): joint_q = local_rot[:, body_id] joint_theta, joint_axis = torch_utils.quat_to_angle_axis(joint_q) joint_theta = joint_theta * joint_axis[..., 1] # assume joint is always along y axis joint_theta = normalize_angle(joint_theta) dof_pos[:, joint_offset] = joint_theta else: print("Unsupported joint type") assert(False) return dof_pos def _local_rotation_to_dof_vel(self, local_rot0, local_rot1, dt): body_ids = self._dof_body_ids dof_offsets = self._dof_offsets dof_vel = torch.zeros([self._num_dof], device=self._device) diff_quat_data = quat_mul_norm(quat_inverse(local_rot0), local_rot1) diff_angle, diff_axis = quat_angle_axis(diff_quat_data) local_vel = diff_axis * diff_angle.unsqueeze(-1) / dt local_vel = local_vel for j in range(len(body_ids)): body_id = body_ids[j] joint_offset = dof_offsets[j] joint_size = dof_offsets[j + 1] - joint_offset if (joint_size == 3): joint_vel = local_vel[body_id] dof_vel[joint_offset:(joint_offset + joint_size)] = joint_vel elif (joint_size == 1): assert(joint_size == 1) joint_vel = local_vel[body_id] dof_vel[joint_offset] = joint_vel[1] # assume joint is always along y axis else: print("Unsupported joint type") assert(False) return dof_vel ================================================ FILE: timechamber/utils/reformat.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from omegaconf import DictConfig, OmegaConf from typing import Dict def omegaconf_to_dict(d: DictConfig)->Dict: """Converts an omegaconf DictConfig to a python Dict, respecting variable interpolation.""" ret = {} for k, v in d.items(): if isinstance(v, DictConfig): ret[k] = omegaconf_to_dict(v) else: ret[k] = v return ret def print_dict(val, nesting: int = -4, start: bool = True): """Outputs a nested dictionory.""" if type(val) == dict: if not start: print('') nesting += 4 for k in val: print(nesting * ' ', end='') print(k, end=': ') print_dict(val[k], nesting, start=False) else: print(val) # EOF ================================================ FILE: timechamber/utils/rlgames_utils.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from rl_games.common import env_configurations, vecenv from rl_games.common.algo_observer import AlgoObserver from rl_games.algos_torch import torch_ext from timechamber.utils.utils import set_seed import torch import numpy as np from typing import Callable from isaacgym import gymapi from isaacgym import gymutil from omegaconf import DictConfig from timechamber.tasks import isaacgym_task_map from timechamber.utils.vec_task_wrappers import VecTaskPythonWrapper from timechamber.utils.config import parse_sim_params SIM_TIMESTEP = 1.0 / 60.0 def get_rlgames_env_creator( # used to create the vec task seed: int, cfg: DictConfig, task_config: dict, task_name: str, sim_device: str, rl_device: str, graphics_device_id: int, headless: bool, device_type: str = "cuda", # Used to handle multi-gpu case multi_gpu: bool = False, post_create_hook: Callable = None, virtual_screen_capture: bool = False, force_render: bool = False, ): """Parses the configuration parameters for the environment task and creates a VecTask Args: task_config: environment configuration. task_name: Name of the task, used to evaluate based on the imported name (eg 'Trifinger') sim_device: The type of env device, eg 'cuda:0' rl_device: Device that RL will be done on, eg 'cuda:0' graphics_device_id: Graphics device ID. headless: Whether to run in headless mode. multi_gpu: Whether to use multi gpu post_create_hook: Hooks to be called after environment creation. [Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs] virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True) Returns: A VecTaskPython object. """ def create_rlgpu_env(): """ Creates the task from configurations and wraps it using RL-games wrappers if required. """ # create native task and pass custom config if task_name == "MA_Humanoid_Strike": sim_params = parse_sim_params(cfg, task_config) if cfg.physics_engine == "physx": physics_engine = gymapi.SIM_PHYSX elif cfg.physics_engine == "flex": physics_engine = gymapi.SIM_FLEX task = isaacgym_task_map[task_name]( cfg=task_config, sim_params=sim_params, physics_engine=physics_engine, device_type=device_type, device_id=graphics_device_id, headless=headless ) env = VecTaskPythonWrapper(task, rl_device, task_config.get("clip_observations", np.inf), task_config.get("clip_actions", 1.0), AMP=True) else: task = isaacgym_task_map[task_name]( cfg=task_config, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render, ) env = VecTaskPythonWrapper(task, rl_device, task_config.get("clip_observations", np.inf), task_config.get("clip_actions", 1.0)) if post_create_hook is not None: post_create_hook() return env return create_rlgpu_env class RLGPUAlgoObserver(AlgoObserver): """Allows us to log stats from the env along with the algorithm running stats. """ def __init__(self): pass def after_init(self, algo): self.algo = algo self.mean_scores = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device) self.ep_infos = [] self.direct_info = {} self.writer = self.algo.writer def process_infos(self, infos, done_indices): assert isinstance(infos, dict), "RLGPUAlgoObserver expects dict info" if isinstance(infos, dict): if 'episode' in infos: self.ep_infos.append(infos['episode']) if len(infos) > 0 and isinstance(infos, dict): # allow direct logging from env self.direct_info = {} for k, v in infos.items(): # only log scalars if isinstance(v, float) or isinstance(v, int) or (isinstance(v, torch.Tensor) and len(v.shape) == 0): self.direct_info[k] = v def after_clear_stats(self): self.mean_scores.clear() def after_print_stats(self, frame, epoch_num, total_time): if self.ep_infos: for key in self.ep_infos[0]: infotensor = torch.tensor([], device=self.algo.device) for ep_info in self.ep_infos: # handle scalar and zero dimensional tensor infos if not isinstance(ep_info[key], torch.Tensor): ep_info[key] = torch.Tensor([ep_info[key]]) if len(ep_info[key].shape) == 0: ep_info[key] = ep_info[key].unsqueeze(0) infotensor = torch.cat((infotensor, ep_info[key].to(self.algo.device))) value = torch.mean(infotensor) self.writer.add_scalar('Episode/' + key, value, epoch_num) self.ep_infos.clear() for k, v in self.direct_info.items(): self.writer.add_scalar(f'{k}/frame', v, frame) self.writer.add_scalar(f'{k}/iter', v, epoch_num) self.writer.add_scalar(f'{k}/time', v, total_time) if self.mean_scores.current_size > 0: mean_scores = self.mean_scores.get_mean() self.writer.add_scalar('scores/mean', mean_scores, frame) self.writer.add_scalar('scores/iter', mean_scores, epoch_num) self.writer.add_scalar('scores/time', mean_scores, total_time) class RLGPUEnv(vecenv.IVecEnv): def __init__(self, config_name, num_actors, **kwargs): self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs) self.use_global_obs = (self.env.num_states > 0) self.full_state = {} self.full_state["obs"] = self.reset() if self.use_global_obs: self.full_state["states"] = self.env.get_state() return def step(self, action): next_obs, reward, is_done, info = self.env.step(action) # todo: improve, return only dictinary self.full_state["obs"] = next_obs if self.use_global_obs: self.full_state["states"] = self.env.get_state() return self.full_state, reward, is_done, info def reset(self, env_ids=None): self.full_state["obs"] = self.env.reset(env_ids) if self.use_global_obs: self.full_state["states"] = self.env.get_state() return self.full_state def get_number_of_agents(self): return self.env.get_number_of_agents() def get_env_info(self): info = {} info['action_space'] = self.env.action_space info['observation_space'] = self.env.observation_space info['amp_observation_space'] = self.env.amp_observation_space if self.use_global_obs: info['state_space'] = self.env.state_space print(info['action_space'], info['observation_space'], info['state_space']) else: print(info['action_space'], info['observation_space']) return info ================================================ FILE: timechamber/utils/torch_jit_utils.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch import numpy as np from isaacgym.torch_utils import * @torch.jit.script def compute_heading_and_up( torso_rotation, inv_start_rot, to_target, vec0, vec1, up_idx ): # type: (Tensor, Tensor, Tensor, Tensor, Tensor, int) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor] num_envs = torso_rotation.shape[0] target_dirs = normalize(to_target) torso_quat = quat_mul(torso_rotation, inv_start_rot) up_vec = get_basis_vector(torso_quat, vec1).view(num_envs, 3) heading_vec = get_basis_vector(torso_quat, vec0).view(num_envs, 3) up_proj = up_vec[:, up_idx] heading_proj = torch.bmm(heading_vec.view( num_envs, 1, 3), target_dirs.view(num_envs, 3, 1)).view(num_envs) return torso_quat, up_proj, heading_proj, up_vec, heading_vec @torch.jit.script def compute_rot(torso_quat, velocity, ang_velocity, targets, torso_positions): vel_loc = quat_rotate_inverse(torso_quat, velocity) angvel_loc = quat_rotate_inverse(torso_quat, ang_velocity) roll, pitch, yaw = get_euler_xyz(torso_quat) walk_target_angle = torch.atan2(targets[:, 2] - torso_positions[:, 2], targets[:, 0] - torso_positions[:, 0]) angle_to_target = walk_target_angle - yaw return vel_loc, angvel_loc, roll, pitch, yaw, angle_to_target @torch.jit.script def quat_axis(q, axis=0): # type: (Tensor, int) -> Tensor basis_vec = torch.zeros(q.shape[0], 3, device=q.device) basis_vec[:, axis] = 1 return quat_rotate(q, basis_vec) """ Normalization and Denormalization of Tensors """ @torch.jit.script def scale_transform(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor: """ Normalizes a given input tensor to a range of [-1, 1]. @note It uses pytorch broadcasting functionality to deal with batched input. Args: x: Input tensor of shape (N, dims). lower: The minimum value of the tensor. Shape (dims,) upper: The maximum value of the tensor. Shape (dims,) Returns: Normalized transform of the tensor. Shape (N, dims) """ # default value of center offset = (lower + upper) * 0.5 # return normalized tensor return 2 * (x - offset) / (upper - lower) @torch.jit.script def unscale_transform(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor: """ Denormalizes a given input tensor from range of [-1, 1] to (lower, upper). @note It uses pytorch broadcasting functionality to deal with batched input. Args: x: Input tensor of shape (N, dims). lower: The minimum value of the tensor. Shape (dims,) upper: The maximum value of the tensor. Shape (dims,) Returns: Denormalized transform of the tensor. Shape (N, dims) """ # default value of center offset = (lower + upper) * 0.5 # return normalized tensor return x * (upper - lower) * 0.5 + offset @torch.jit.script def saturate(x: torch.Tensor, lower: torch.Tensor, upper: torch.Tensor) -> torch.Tensor: """ Clamps a given input tensor to (lower, upper). @note It uses pytorch broadcasting functionality to deal with batched input. Args: x: Input tensor of shape (N, dims). lower: The minimum value of the tensor. Shape (dims,) upper: The maximum value of the tensor. Shape (dims,) Returns: Clamped transform of the tensor. Shape (N, dims) """ return torch.max(torch.min(x, upper), lower) """ Rotation conversions """ @torch.jit.script def quat_diff_rad(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: """ Get the difference in radians between two quaternions. Args: a: first quaternion, shape (N, 4) b: second quaternion, shape (N, 4) Returns: Difference in radians, shape (N,) """ b_conj = quat_conjugate(b) mul = quat_mul(a, b_conj) # 2 * torch.acos(torch.abs(mul[:, -1])) return 2.0 * torch.asin( torch.clamp( torch.norm( mul[:, 0:3], p=2, dim=-1), max=1.0) ) @torch.jit.script def local_to_world_space(pos_offset_local: torch.Tensor, pose_global: torch.Tensor): """ Convert a point from the local frame to the global frame Args: pos_offset_local: Point in local frame. Shape: [N, 3] pose_global: The spatial pose of this point. Shape: [N, 7] Returns: Position in the global frame. Shape: [N, 3] """ quat_pos_local = torch.cat( [pos_offset_local, torch.zeros(pos_offset_local.shape[0], 1, dtype=torch.float32, device=pos_offset_local.device)], dim=-1 ) quat_global = pose_global[:, 3:7] quat_global_conj = quat_conjugate(quat_global) pos_offset_global = quat_mul(quat_global, quat_mul(quat_pos_local, quat_global_conj))[:, 0:3] result_pos_gloal = pos_offset_global + pose_global[:, 0:3] return result_pos_gloal # NB: do not make this function jit, since it is passed around as an argument. def normalise_quat_in_pose(pose): """Takes a pose and normalises the quaternion portion of it. Args: pose: shape N, 7 Returns: Pose with normalised quat. Shape N, 7 """ pos = pose[:, 0:3] quat = pose[:, 3:7] quat /= torch.norm(quat, dim=-1, p=2).reshape(-1, 1) return torch.cat([pos, quat], dim=-1) @torch.jit.script def my_quat_rotate(q, v): shape = q.shape q_w = q[:, -1] q_vec = q[:, :3] a = v * (2.0 * q_w ** 2 - 1.0).unsqueeze(-1) b = torch.cross(q_vec, v, dim=-1) * q_w.unsqueeze(-1) * 2.0 c = q_vec * \ torch.bmm(q_vec.view(shape[0], 1, 3), v.view( shape[0], 3, 1)).squeeze(-1) * 2.0 return a + b + c @torch.jit.script def quat_to_angle_axis(q): # type: (Tensor) -> Tuple[Tensor, Tensor] # computes axis-angle representation from quaternion q # q must be normalized min_theta = 1e-5 qx, qy, qz, qw = 0, 1, 2, 3 sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw]) angle = 2 * torch.acos(q[..., qw]) angle = normalize_angle(angle) sin_theta_expand = sin_theta.unsqueeze(-1) axis = q[..., qx:qw] / sin_theta_expand mask = sin_theta > min_theta default_axis = torch.zeros_like(axis) default_axis[..., -1] = 1 angle = torch.where(mask, angle, torch.zeros_like(angle)) mask_expand = mask.unsqueeze(-1) axis = torch.where(mask_expand, axis, default_axis) return angle, axis @torch.jit.script def angle_axis_to_exp_map(angle, axis): # type: (Tensor, Tensor) -> Tensor # compute exponential map from axis-angle angle_expand = angle.unsqueeze(-1) exp_map = angle_expand * axis return exp_map @torch.jit.script def quat_to_exp_map(q): # type: (Tensor) -> Tensor # compute exponential map from quaternion # q must be normalized angle, axis = quat_to_angle_axis(q) exp_map = angle_axis_to_exp_map(angle, axis) return exp_map @torch.jit.script def quat_to_tan_norm(q): # type: (Tensor) -> Tensor # represents a rotation using the tangent and normal vectors ref_tan = torch.zeros_like(q[..., 0:3]) ref_tan[..., 0] = 1 tan = my_quat_rotate(q, ref_tan) ref_norm = torch.zeros_like(q[..., 0:3]) ref_norm[..., -1] = 1 norm = my_quat_rotate(q, ref_norm) norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1) return norm_tan @torch.jit.script def euler_xyz_to_exp_map(roll, pitch, yaw): # type: (Tensor, Tensor, Tensor) -> Tensor q = quat_from_euler_xyz(roll, pitch, yaw) exp_map = quat_to_exp_map(q) return exp_map @torch.jit.script def exp_map_to_angle_axis(exp_map): min_theta = 1e-5 angle = torch.norm(exp_map, dim=-1) angle_exp = torch.unsqueeze(angle, dim=-1) axis = exp_map / angle_exp angle = normalize_angle(angle) default_axis = torch.zeros_like(exp_map) default_axis[..., -1] = 1 mask = angle > min_theta angle = torch.where(mask, angle, torch.zeros_like(angle)) mask_expand = mask.unsqueeze(-1) axis = torch.where(mask_expand, axis, default_axis) return angle, axis @torch.jit.script def exp_map_to_quat(exp_map): angle, axis = exp_map_to_angle_axis(exp_map) q = quat_from_angle_axis(angle, axis) return q @torch.jit.script def slerp(q0, q1, t): # type: (Tensor, Tensor, Tensor) -> Tensor qx, qy, qz, qw = 0, 1, 2, 3 cos_half_theta = q0[..., qw] * q1[..., qw] \ + q0[..., qx] * q1[..., qx] \ + q0[..., qy] * q1[..., qy] \ + q0[..., qz] * q1[..., qz] neg_mask = cos_half_theta < 0 q1 = q1.clone() q1[neg_mask] = -q1[neg_mask] cos_half_theta = torch.abs(cos_half_theta) cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1) half_theta = torch.acos(cos_half_theta); sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta); ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta; ratioB = torch.sin(t * half_theta) / sin_half_theta; new_q_x = ratioA * q0[..., qx:qx+1] + ratioB * q1[..., qx:qx+1] new_q_y = ratioA * q0[..., qy:qy+1] + ratioB * q1[..., qy:qy+1] new_q_z = ratioA * q0[..., qz:qz+1] + ratioB * q1[..., qz:qz+1] new_q_w = ratioA * q0[..., qw:qw+1] + ratioB * q1[..., qw:qw+1] cat_dim = len(new_q_w.shape) - 1 new_q = torch.cat([new_q_x, new_q_y, new_q_z, new_q_w], dim=cat_dim) new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q) new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q) return new_q @torch.jit.script def calc_heading(q): # type: (Tensor) -> Tensor # calculate heading direction from quaternion # the heading is the direction on the xy plane # q must be normalized ref_dir = torch.zeros_like(q[..., 0:3]) ref_dir[..., 0] = 1 rot_dir = my_quat_rotate(q, ref_dir) heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0]) return heading @torch.jit.script def calc_heading_quat(q): # type: (Tensor) -> Tensor # calculate heading rotation from quaternion # the heading is the direction on the xy plane # q must be normalized heading = calc_heading(q) axis = torch.zeros_like(q[..., 0:3]) axis[..., 2] = 1 heading_q = quat_from_angle_axis(heading, axis) return heading_q @torch.jit.script def calc_heading_quat_inv(q): # type: (Tensor) -> Tensor # calculate heading rotation from quaternion # the heading is the direction on the xy plane # q must be normalized heading = calc_heading(q) axis = torch.zeros_like(q[..., 0:3]) axis[..., 2] = 1 heading_q = quat_from_angle_axis(-heading, axis) return heading_q # EOF ================================================ FILE: timechamber/utils/torch_utils.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import torch import numpy as np from isaacgym.torch_utils import * @torch.jit.script def quat_to_angle_axis(q): # type: (Tensor) -> Tuple[Tensor, Tensor] # computes axis-angle representation from quaternion q # q must be normalized min_theta = 1e-5 qx, qy, qz, qw = 0, 1, 2, 3 sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw]) angle = 2 * torch.acos(q[..., qw]) angle = normalize_angle(angle) sin_theta_expand = sin_theta.unsqueeze(-1) axis = q[..., qx:qw] / sin_theta_expand mask = torch.abs(sin_theta) > min_theta default_axis = torch.zeros_like(axis) default_axis[..., -1] = 1 angle = torch.where(mask, angle, torch.zeros_like(angle)) mask_expand = mask.unsqueeze(-1) axis = torch.where(mask_expand, axis, default_axis) return angle, axis @torch.jit.script def angle_axis_to_exp_map(angle, axis): # type: (Tensor, Tensor) -> Tensor # compute exponential map from axis-angle angle_expand = angle.unsqueeze(-1) exp_map = angle_expand * axis return exp_map @torch.jit.script def quat_to_exp_map(q): # type: (Tensor) -> Tensor # compute exponential map from quaternion # q must be normalized angle, axis = quat_to_angle_axis(q) exp_map = angle_axis_to_exp_map(angle, axis) return exp_map @torch.jit.script def quat_to_tan_norm(q): # type: (Tensor) -> Tensor # represents a rotation using the tangent and normal vectors ref_tan = torch.zeros_like(q[..., 0:3]) ref_tan[..., 0] = 1 tan = quat_rotate(q, ref_tan) ref_norm = torch.zeros_like(q[..., 0:3]) ref_norm[..., -1] = 1 norm = quat_rotate(q, ref_norm) norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1) return norm_tan @torch.jit.script def euler_xyz_to_exp_map(roll, pitch, yaw): # type: (Tensor, Tensor, Tensor) -> Tensor q = quat_from_euler_xyz(roll, pitch, yaw) exp_map = quat_to_exp_map(q) return exp_map @torch.jit.script def exp_map_to_angle_axis(exp_map): min_theta = 1e-5 angle = torch.norm(exp_map, dim=-1) angle_exp = torch.unsqueeze(angle, dim=-1) axis = exp_map / angle_exp angle = normalize_angle(angle) default_axis = torch.zeros_like(exp_map) default_axis[..., -1] = 1 mask = torch.abs(angle) > min_theta angle = torch.where(mask, angle, torch.zeros_like(angle)) mask_expand = mask.unsqueeze(-1) axis = torch.where(mask_expand, axis, default_axis) return angle, axis @torch.jit.script def exp_map_to_quat(exp_map): angle, axis = exp_map_to_angle_axis(exp_map) q = quat_from_angle_axis(angle, axis) return q @torch.jit.script def slerp(q0, q1, t): # type: (Tensor, Tensor, Tensor) -> Tensor cos_half_theta = torch.sum(q0 * q1, dim=-1) neg_mask = cos_half_theta < 0 q1 = q1.clone() q1[neg_mask] = -q1[neg_mask] cos_half_theta = torch.abs(cos_half_theta) cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1) half_theta = torch.acos(cos_half_theta); sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta); ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta; ratioB = torch.sin(t * half_theta) / sin_half_theta; new_q = ratioA * q0 + ratioB * q1 new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q) new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q) return new_q @torch.jit.script def calc_heading(q): # type: (Tensor) -> Tensor # calculate heading direction from quaternion # the heading is the direction on the xy plane # q must be normalized ref_dir = torch.zeros_like(q[..., 0:3]) ref_dir[..., 0] = 1 rot_dir = quat_rotate(q, ref_dir) heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0]) return heading @torch.jit.script def calc_heading_quat(q): # type: (Tensor) -> Tensor # calculate heading rotation from quaternion # the heading is the direction on the xy plane # q must be normalized heading = calc_heading(q) axis = torch.zeros_like(q[..., 0:3]) axis[..., 2] = 1 heading_q = quat_from_angle_axis(heading, axis) return heading_q @torch.jit.script def calc_heading_quat_inv(q): # type: (Tensor) -> Tensor # calculate heading rotation from quaternion # the heading is the direction on the xy plane # q must be normalized heading = calc_heading(q) axis = torch.zeros_like(q[..., 0:3]) axis[..., 2] = 1 heading_q = quat_from_angle_axis(-heading, axis) return heading_q ================================================ FILE: timechamber/utils/utils.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # python import numpy as np import torch import random import os from collections import OrderedDict import time from isaacgym import gymapi from isaacgym import gymutil def set_np_formatting(): """ formats numpy print """ np.set_printoptions(edgeitems=30, infstr='inf', linewidth=4000, nanstr='nan', precision=2, suppress=False, threshold=10000, formatter=None) def set_seed(seed, torch_deterministic=False, rank=0): """ set seed across modules """ if seed == -1 and torch_deterministic: seed = 42 + rank elif seed == -1: seed = np.random.randint(0, 10000) else: seed = seed + rank print("Setting seed: {}".format(seed)) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) if torch_deterministic: # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.use_deterministic_algorithms(True) else: torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False return seed def load_check(checkpoint, normalize_input: bool, normalize_value: bool): extras = OrderedDict() if normalize_value and 'value_mean_std.running_mean' not in checkpoint['model'].keys(): extras['value_mean_std.running_mean'] = checkpoint['reward_mean_std']['running_mean'] extras['value_mean_std.running_var'] = checkpoint['reward_mean_std']['running_var'] extras['value_mean_std.count'] = checkpoint['reward_mean_std']['count'] if normalize_input and 'running_mean_std.running_mean' not in checkpoint['model'].keys(): extras['running_mean_std.running_mean'] = checkpoint['running_mean_std']['running_mean'] extras['running_mean_std.running_var'] = checkpoint['running_mean_std']['running_var'] extras['running_mean_std.count'] = checkpoint['running_mean_std']['count'] extras.update(checkpoint['model']) checkpoint['model'] = extras return checkpoint def safe_filesystem_op(func, *args, **kwargs): """ This is to prevent spurious crashes related to saving checkpoints or restoring from checkpoints in a Network Filesystem environment (i.e. NGC cloud or SLURM) """ num_attempts = 5 for attempt in range(num_attempts): try: return func(*args, **kwargs) except Exception as exc: print(f'Exception {exc} when trying to execute {func} with args:{args} and kwargs:{kwargs}...') wait_sec = 2 ** attempt print(f'Waiting {wait_sec} before trying again...') time.sleep(wait_sec) raise RuntimeError(f'Could not execute {func}, give up after {num_attempts} attempts...') def safe_load(filename, device=None): if device is not None: return safe_filesystem_op(torch.load, filename, map_location=device) else: return safe_filesystem_op(torch.load, filename) def load_checkpoint(filename, device=None): print("=> loading checkpoint '{}'".format(filename)) state = safe_load(filename, device=device) return state def print_actor_info(gym, env, actor_handle): name = gym.get_actor_name(env, actor_handle) body_names = gym.get_actor_rigid_body_names(env, actor_handle) body_dict = gym.get_actor_rigid_body_dict(env, actor_handle) joint_names = gym.get_actor_joint_names(env, actor_handle) joint_dict = gym.get_actor_joint_dict(env, actor_handle) dof_names = gym.get_actor_dof_names(env, actor_handle) dof_dict = gym.get_actor_dof_dict(env, actor_handle) print() print("===== Actor: %s =======================================" % name) print("\nBodies") print(body_names) print(body_dict) print("\nJoints") print(joint_names) print(joint_dict) print("\n Degrees Of Freedom (DOFs)") print(dof_names) print(dof_dict) print() # Get body state information body_states = gym.get_actor_rigid_body_states( env, actor_handle, gymapi.STATE_ALL) # Print some state slices print("Poses from Body State:") print(body_states['pose']) # print just the poses print("\nVelocities from Body State:") print(body_states['vel']) # print just the velocities print() # iterate through bodies and print name and position body_positions = body_states['pose']['p'] for i in range(len(body_names)): print("Body '%s' has position" % body_names[i], body_positions[i]) print("\nDOF states:") # get DOF states dof_states = gym.get_actor_dof_states(env, actor_handle, gymapi.STATE_ALL) # print some state slices # Print all states for each degree of freedom print(dof_states) print() # iterate through DOFs and print name and position dof_positions = dof_states['pos'] for i in range(len(dof_names)): print("DOF '%s' has position" % dof_names[i], dof_positions[i]) def print_asset_info(asset, name, gym): print("======== Asset info %s: ========" % (name)) num_bodies = gym.get_asset_rigid_body_count(asset) num_joints = gym.get_asset_joint_count(asset) num_dofs = gym.get_asset_dof_count(asset) print("Got %d bodies, %d joints, and %d DOFs" % (num_bodies, num_joints, num_dofs)) # Iterate through bodies print("Bodies:") for i in range(num_bodies): name = gym.get_asset_rigid_body_name(asset, i) print(" %2d: '%s'" % (i, name)) # Iterate through joints print("Joints:") for i in range(num_joints): name = gym.get_asset_joint_name(asset, i) type = gym.get_asset_joint_type(asset, i) type_name = gym.get_joint_type_string(type) print(" %2d: '%s' (%s)" % (i, name, type_name)) # iterate through degrees of freedom (DOFs) print("DOFs:") for i in range(num_dofs): name = gym.get_asset_dof_name(asset, i) type = gym.get_asset_dof_type(asset, i) type_name = gym.get_dof_type_string(type) print(" %2d: '%s' (%s)" % (i, name, type_name)) # EOF ================================================ FILE: timechamber/utils/vec_task.py ================================================ # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. from gym import spaces from isaacgym import gymtorch from isaacgym.torch_utils import to_torch import torch import numpy as np # VecEnv Wrapper for RL training class VecTask(): def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): self.task = task self.num_environments = task.num_envs self.num_agents = 1 # used for multi-agent environments self.num_observations = task.num_obs self.num_states = task.num_states self.num_actions = task.num_actions self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf) self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf) self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.) self.clip_obs = clip_observations self.clip_actions = clip_actions self.rl_device = rl_device print("RL device: ", rl_device) def step(self, actions): raise NotImplementedError def reset(self): raise NotImplementedError def get_number_of_agents(self): return self.num_agents @property def observation_space(self): return self.obs_space @property def action_space(self): return self.act_space @property def num_envs(self): return self.num_environments @property def num_acts(self): return self.num_actions @property def num_obs(self): return self.num_observations # C++ CPU Class class VecTaskCPU(VecTask): def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0): super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions) self.sync_frame_time = sync_frame_time def step(self, actions): actions = actions.cpu().numpy() self.task.render(self.sync_frame_time) obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions)) return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device), to_torch(rewards, dtype=torch.float, device=self.rl_device), to_torch(resets, dtype=torch.uint8, device=self.rl_device), []) def reset(self): actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f') # step the simulator obs, rewards, resets, extras = self.task.step(actions) return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device) # C++ GPU Class class VecTaskGPU(VecTask): def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions) self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs)) self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,)) self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,)) def step(self, actions): self.task.render(False) actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions) actions_tensor = gymtorch.unwrap_tensor(actions_clipped) self.task.step(actions_tensor) return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, [] def reset(self): actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device)) actions_tensor = gymtorch.unwrap_tensor(actions) # step the simulator self.task.step(actions_tensor) return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs) # Python CPU/GPU Class class VecTaskPython(VecTask): def get_state(self): return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) def step(self, actions): actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions) self.task.step(actions_tensor) return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras def reset(self): actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device)) # step the simulator self.task.step(actions) return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) ================================================ FILE: timechamber/utils/vec_task_wrappers.py ================================================ # Copyright (c) 2018-2022, NVIDIA Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from gym import spaces import numpy as np import torch from timechamber.utils.vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython class VecTaskCPUWrapper(VecTaskCPU): def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0): super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions) return class VecTaskGPUWrapper(VecTaskGPU): def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): super().__init__(task, rl_device, clip_observations, clip_actions) return class VecTaskPythonWrapper(VecTaskPython): def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0, AMP=False): super().__init__(task, rl_device, clip_observations, clip_actions) if AMP: self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf) else: self._amp_obs_space = None return def reset(self, env_ids=None): self.task.reset(env_ids) return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) @property def amp_observation_space(self): return self._amp_obs_space def fetch_amp_obs_demo(self, num_samples): return self.task.fetch_amp_obs_demo(num_samples)